Exemplo n.º 1
0
def absolute_momentum(u_wind, v_wind, index='index'):
    r"""Calculate cross-sectional absolute momentum (also called pseudoangular momentum).

    As given in [Schultz1999]_, absolute momentum (also called pseudoangular momentum) is
    given by

    .. math:: M = v + fx

    where :math:`v` is the along-front component of the wind and :math:`x` is the cross-front
    distance. Applied to a cross-section taken perpendicular to the front, :math:`v` becomes
    the normal component of the wind and :math:`x` the tangential distance.

    If using this calculation in assessing symmetric instability, geostrophic wind should be
    used so that geostrophic absolute momentum :math:`\left(M_g\right)` is obtained, as
    described in [Schultz1999]_.

    Parameters
    ----------
    u_wind : `xarray.DataArray`
        The input DataArray of the x-component (in terms of data projection) of the wind.
    v_wind : `xarray.DataArray`
        The input DataArray of the y-component (in terms of data projection) of the wind.

    Returns
    -------
    absolute_momentum: `xarray.DataArray`
        The absolute momentum

    Notes
    -----
    The coordinates of `u_wind` and `v_wind` must match.

    """
    # Get the normal component of the wind
    norm_wind = normal_component(u_wind, v_wind, index=index)
    norm_wind.metpy.convert_units('m/s')

    # Get other pieces of calculation (all as ndarrays matching shape of norm_wind)
    latitude = latitude_from_cross_section(norm_wind)  # in degrees_north
    _, latitude = xr.broadcast(norm_wind, latitude)
    f = coriolis_parameter(np.deg2rad(latitude.values)).magnitude  # in 1/s
    x, y = distances_from_cross_section(norm_wind)
    x.metpy.convert_units('meters')
    y.metpy.convert_units('meters')
    _, x, y = xr.broadcast(norm_wind, x, y)
    distance = np.hypot(x, y).values  # in meters

    m = norm_wind + f * distance
    m.attrs = {'units': norm_wind.attrs['units']}

    return m
Exemplo n.º 2
0
def _get_dates_for_extremes(extr_vals: xarray.DataArray, current_data_chunk: xarray.DataArray,
                            extr_dates: xarray.DataArray = None):
    """
    Helper method to determine the times when the extreme values are occurring
    :param extr_vals:
    :param current_data_chunk:
    :param result_dates:
    """
    t3d, _ = xarray.broadcast(current_data_chunk.t, current_data_chunk)

    if extr_dates is None:
        result_dates = t3d[0, :, :].copy()
    else:
        result_dates = extr_dates

    tis, xis, yis = np.where(extr_vals == current_data_chunk)

    npvals = t3d.values
    # for ti, xi, yi in zip(tis, xis, yis):
    #     result_dates[xi, yi] = npvals[ti, xi, yi]

    result_dates.values[xis, yis] = npvals[tis, xis, yis]

    # debug

    return result_dates
Exemplo n.º 3
0
def data_for_reg_calcs(values_for_reg_arr):
    lat = [-10., 1., 10., 20.]
    lon = [1., 10.]
    sfc_area = [0.5, 1., 0.5, 0.25]
    land_mask = [1., 1., 0., 1.]

    lat = xr.DataArray(lat, dims=[LAT_STR], coords=[lat])
    lon = xr.DataArray(lon, dims=[LON_STR], coords=[lon])
    sfc_area = xr.DataArray(sfc_area, dims=[LAT_STR], coords=[lat])
    land_mask = xr.DataArray(land_mask, dims=[LAT_STR], coords=[lat])

    sfc_area, _ = xr.broadcast(sfc_area, lon)
    land_mask, _ = xr.broadcast(land_mask, lon)

    da = xr.DataArray(values_for_reg_arr, coords=[lat, lon])
    da.coords[SFC_AREA_STR] = sfc_area
    da.coords[LAND_MASK_STR] = land_mask
    return da
Exemplo n.º 4
0
def test_laplacian_xarray_lonlat(test_da_lonlat):
    """Test laplacian with an xarray.DataArray on a lonlat grid."""
    laplac = laplacian(test_da_lonlat, axes=('lat', 'lon'))

    # Build the xarray of the desired values
    partial = xr.DataArray(
        np.array([1.67155420e-14, 1.67155420e-14, 1.74268211e-14, 1.74268211e-14]),
        coords=(('lat', test_da_lonlat['lat']),)
    )
    _, truth = xr.broadcast(test_da_lonlat, partial)
    truth.coords['crs'] = test_da_lonlat['crs']
    truth.attrs['units'] = 'kelvin / meter^2'

    xr.testing.assert_allclose(laplac, truth)
    assert laplac.metpy.units == truth.metpy.units
Exemplo n.º 5
0
def test_second_derivative_xarray_lonlat(test_da_lonlat):
    """Test second derivative with an xarray.DataArray on a lonlat grid."""
    deriv = second_derivative(test_da_lonlat, axis='lat')

    # Build the xarray of the desired values
    partial = xr.DataArray(
        np.array([1.67155420e-14, 1.67155420e-14, 1.74268211e-14, 1.74268211e-14]),
        coords=(('lat', test_da_lonlat['lat']),)
    )
    _, truth = xr.broadcast(test_da_lonlat, partial)
    truth.coords['crs'] = test_da_lonlat['crs']
    truth.attrs['units'] = 'kelvin / meter^2'

    xr.testing.assert_allclose(deriv, truth)
    assert deriv.metpy.units == truth.metpy.units
Exemplo n.º 6
0
def test_first_derivative_xarray_lonlat(test_da_lonlat):
    """Test first derivative with an xarray.DataArray on a lonlat grid in each axis usage."""
    deriv = first_derivative(test_da_lonlat, axis='lon')  # dimension coordinate name
    deriv_alt1 = first_derivative(test_da_lonlat, axis='x')  # axis type
    deriv_alt2 = first_derivative(test_da_lonlat, axis=-1)  # axis number

    # Build the xarray of the desired values
    partial = xr.DataArray(
        np.array([-3.30782978e-06, -3.42816074e-06, -3.57012948e-06, -3.73759364e-06]),
        coords=(('lat', test_da_lonlat['lat']),)
    )
    _, truth = xr.broadcast(test_da_lonlat, partial)
    truth.coords['crs'] = test_da_lonlat['crs']
    truth.attrs['units'] = 'kelvin / meter'

    # Assert result matches expectation
    xr.testing.assert_allclose(deriv, truth)
    assert deriv.metpy.units == truth.metpy.units

    # Assert alternative specifications give same result
    xr.testing.assert_identical(deriv_alt1, deriv)
    xr.testing.assert_identical(deriv_alt2, deriv)
Exemplo n.º 7
0
def test_gradient_xarray(test_da_xy):
    """Test the 3D gradient calculation with a 4D DataArray in each axis usage."""
    deriv_x, deriv_y, deriv_p = gradient(test_da_xy, axes=('x', 'y', 'isobaric'))
    deriv_x_alt1, deriv_y_alt1, deriv_p_alt1 = gradient(test_da_xy,
                                                        axes=('x', 'y', 'vertical'))
    deriv_x_alt2, deriv_y_alt2, deriv_p_alt2 = gradient(test_da_xy, axes=(3, 2, 1))

    truth_x = xr.full_like(test_da_xy, -6.993007e-07)
    truth_x.attrs['units'] = 'kelvin / meter'

    truth_y = xr.full_like(test_da_xy, -2.797203e-06)
    truth_y.attrs['units'] = 'kelvin / meter'

    partial = xr.DataArray(
        np.array([0.04129204, 0.03330003, 0.02264402]),
        coords=(('isobaric', test_da_xy['isobaric']),)
    )
    _, truth_p = xr.broadcast(test_da_xy, partial)
    truth_p.coords['crs'] = test_da_xy['crs']
    truth_p.attrs['units'] = 'kelvin / hectopascal'

    # Assert results match expectations
    xr.testing.assert_allclose(deriv_x, truth_x)
    assert deriv_x.metpy.units == truth_x.metpy.units
    xr.testing.assert_allclose(deriv_y, truth_y)
    assert deriv_y.metpy.units == truth_y.metpy.units
    xr.testing.assert_allclose(deriv_p, truth_p)
    assert deriv_p.metpy.units == truth_p.metpy.units

    # Assert alternative specifications give same results
    xr.testing.assert_identical(deriv_x_alt1, deriv_x)
    xr.testing.assert_identical(deriv_y_alt1, deriv_y)
    xr.testing.assert_identical(deriv_p_alt1, deriv_p)
    xr.testing.assert_identical(deriv_x_alt2, deriv_x)
    xr.testing.assert_identical(deriv_y_alt2, deriv_y)
    xr.testing.assert_identical(deriv_p_alt2, deriv_p)
Exemplo n.º 8
0

def noisePower(h, p0):
    return p0 * h**2


filesW = glob(
    '/data/data_hatpro/jue/data/joyrad94/l0/201511/2*/joyrad94_joyce_2015112*.nc'
)
for fw in filesW:
    print(fw)
    ncfile = xr.open_dataset(fw, drop_variables='velocity')
    Ze = ncfile['Ze']
    Ze = Ze.where(Ze != -999.0)
    Zlin = 10.0**(0.1 * Ze)
    t, r = xr.broadcast(Ze.time, Ze.range)
    df = pd.DataFrame()
    df['Hgt'] = r.data.flatten()
    df['Ze'] = Ze.data.flatten()
    spec = ncfile.spec
    spec = spec.where(spec != -999.0)
    speclin = 10.0**(0.1 * spec)
    Zg = speclin.sum(dim='velocity', skipna=True)
    N = 10.0 * np.log10(Zg - Zlin).data.flatten()
    N[~np.isfinite(N)] = np.nan
    df['N'] = N
    df.dropna(inplace=True, subset=['Ze'])
    df.to_hdf('joyrad94snr.h5', key='stat', mode='a', append=True)

print('done')
df = pd.read_hdf('joyrad94snr.h5', key='stat')
Exemplo n.º 9
0
def rle(da: xr.DataArray,
        dim: str = "time",
        max_chunk: int = 1_000_000) -> xr.DataArray:
    """Generate basic run length function.

    Parameters
    ----------
    da : xr.DataArray
    dim : str
    max_chunk : int

    Returns
    -------
    xr.DataArray
      Values are 0 where da is False (out of runs),
      are N on the first day of a run, where N is the length of that run,
      and are NaN on the other days of the runs.
    """
    use_dask = isinstance(da.data, dsk.Array)
    n = len(da[dim])
    # Need to chunk here to ensure the broadcasting is not made in memory
    i = xr.DataArray(np.arange(da[dim].size), dims=dim)
    if use_dask:
        i = i.chunk({dim: -1})

    ind, da = xr.broadcast(i, da)
    if use_dask:
        # Rechunk, but with broadcasted da
        ind = ind.chunk(da.chunks)

    b = ind.where(~da)  # find indexes where false
    end1 = (da.where(b[dim] == b[dim][-1], drop=True) * 0 + n
            )  # add additional end value index (deal with end cases)
    start1 = (da.where(b[dim] == b[dim][0], drop=True) * 0 - 1
              )  # add additional start index (deal with end cases)
    b = xr.concat([start1, b, end1], dim)

    # Ensure bfill operates on entire (unchunked) time dimension
    # Determine appropraite chunk size for other dims - do not exceed 'max_chunk' total size per chunk (default 1000000)
    ndims = len(b.shape)
    if use_dask:
        chunk_dim = b[dim].size
        # divide extra dims into equal size
        # Note : even if calculated chunksize > dim.size result will have chunk==dim.size
        chunksize_ex_dims = None  # TODO: This raises type assignment errors in mypy
        if ndims > 1:
            chunksize_ex_dims = np.round(
                np.power(max_chunk / chunk_dim, 1 / (ndims - 1)))
        chunks = dict()
        chunks[dim] = -1
        for dd in b.dims:
            if dd != dim:
                chunks[dd] = chunksize_ex_dims
        b = b.chunk(chunks)

    # back fill nans with first position after
    z = b.bfill(dim=dim)
    # calculate lengths
    d = z.diff(dim=dim) - 1
    d = d.where(d >= 0)
    d = d.isel({dim: slice(None, -1)}).where(da, 0)
    return d
Exemplo n.º 10
0
def r2(a, b, dim=None, weights=None, skipna=False, keep_attrs=False):
    """R^2 (coefficient of determination) score.

    We first take the total sum of squares of our known vector, a.

    .. math::
        SS_{\\mathrm{tot}} = \\sum_{i=1}^{n} (a_{i} - \\bar{a})^{2}

    Next, we take the sum of squares of the error between our known vector
    a and the predicted vector, b.

    .. math::
        SS_{\\mathrm{res}} = \\sum_{i=1}^{n} (a_{i} - b_{i})^{2}

    Lastly we compute the coefficient of determiniation using these two
    terms.

    .. math::
        R^{2} = 1 - \\frac{SS_{\\mathrm{res}}}{SS_{\\mathrm{tot}}}

    .. note::
        The coefficient of determination is *not* symmetric. In other words,
        ``r2(a, b) != r2(b, a)``. Be careful and note that by our
        convention, ``b`` is the modeled/predicted vector and ``a`` is the
        observed vector.

    Parameters
    ----------
    a : xarray.Dataset or xarray.DataArray
        Labeled array(s) over which to apply the function.
    b : xarray.Dataset or xarray.DataArray
        Labeled array(s) over which to apply the function.
    dim : str, list
        The dimension(s) to apply the correlation along. Note that this dimension will
        be reduced as a result. Defaults to None reducing all dimensions.
    weights : xarray.Dataset or xarray.DataArray or None
        Weights matching dimensions of ``dim`` to apply during the function.
    skipna : bool
        If True, skip NaNs when computing function.
    keep_attrs : bool
        If True, the attributes (attrs) will be copied
        from the first input to the new one.
        If False (default), the new object will
        be returned without attributes.

    Returns
    -------
    xarray.DataArray or xarray.Dataset
        R^2 (coefficient of determination) score.

    See Also
    --------
    sklearn.metrics.r2_score

    References
    ----------
    https://en.wikipedia.org/wiki/Coefficient_of_determination

    Examples
    --------
    >>> import numpy as np
    >>> import xarray as xr
    >>> from xskillscore import r2
    >>> a = xr.DataArray(np.random.rand(5, 3, 3),
                        dims=['time', 'x', 'y'])
    >>> b = xr.DataArray(np.random.rand(5, 3, 3),
                        dims=['time', 'x', 'y'])
    >>> r2(a, b, dim='time')
    """
    _fail_if_dim_empty(dim)
    dim, _ = _preprocess_dims(dim, a)
    a, b = xr.broadcast(a, b, exclude=dim)
    a, b, new_dim, weights = _stack_input_if_needed(a, b, dim, weights)
    weights = _preprocess_weights(a, dim, new_dim, weights)

    input_core_dims = _determine_input_core_dims(new_dim, weights)

    return xr.apply_ufunc(
        _r2,
        a,
        b,
        weights,
        input_core_dims=input_core_dims,
        kwargs={
            "axis": -1,
            "skipna": skipna
        },
        dask="parallelized",
        output_dtypes=[float],
        keep_attrs=keep_attrs,
    )
Exemplo n.º 11
0
def reproject_xy_to_wgs84(
        src_dataset: xr.Dataset,
        src_xy_var_names: Tuple[str, str],
        src_xy_tp_var_names: Tuple[str, str] = None,
        src_xy_crs: str = None,
        src_xy_gcp_step: Union[int, Tuple[int, int]] = 10,
        src_xy_tp_gcp_step: Union[int, Tuple[int, int]] = 1,
        dst_size: Tuple[int, int] = None,
        dst_region: CoordRange = None,
        dst_resampling: Union[str, Dict[str, str]] = DEFAULT_RESAMPLING,
        include_xy_vars: bool = False,
        include_non_spatial_vars: bool = False) -> xr.Dataset:
    """
    Reprojection of xarray datasets with 2D geo-coding, e.g. with variables lon(y,x), lat(y, x) to
    EPSG:4326 (WGS-84) coordinate reference system.

    If *dst_resampling* is a string, it provides the default resampling for all variables.
    If *dst_resampling* is a dictionary, it provides a mapping from variable names to the desired
    resampling for that variable.

    The resampling may be one of the following up-sampling algorithms:

    * ``Nearest``
    * ``Bilinear``
    * ``Cubic``
    * ``CubicSpline``
    * ``Lanczos``

    Or one of the down-sampling algorithms:

    * ``Average``
    * ``Min``
    * ``Max``
    * ``Median``
    * ``Mode``
    * ``Q1``
    * ``Q3``

    :param src_dataset:
    :param src_xy_var_names:
    :param src_xy_tp_var_names:
    :param src_xy_crs:
    :param src_xy_gcp_step:
    :param src_xy_tp_gcp_step:
    :param dst_size:
    :param dst_region:
    :param dst_resampling: The spatial resampling algorithm. Either a string that provides the default resampling
           algorithm name or a dictionary that maps variable names to per-variable resampling algorithm names.
    :param include_non_spatial_vars:
    :param include_xy_vars: Whether to include the variables given by *src_xy_var_names*.
           Useful for projection-validation.
    :return: the reprojected dataset
    """
    x_name, y_name = src_xy_var_names
    tp_x_name, tp_y_name = src_xy_tp_var_names or (None, None)

    # Set defaults
    src_xy_crs = src_xy_crs or CRS_WKT_EPSG_4326
    gcp_i_step, gcp_j_step = (src_xy_gcp_step, src_xy_gcp_step) if isinstance(src_xy_gcp_step, int) \
        else src_xy_gcp_step
    tp_gcp_i_step, tp_gcp_j_step = (src_xy_tp_gcp_step, src_xy_tp_gcp_step) if src_xy_tp_gcp_step is None or isinstance(
        src_xy_tp_gcp_step, int) \
        else src_xy_tp_gcp_step

    dst_width, dst_height = dst_size

    _assert(src_dataset is not None)
    _assert(dst_width > 1)
    _assert(dst_height > 1)
    _assert(gcp_i_step > 0)
    _assert(gcp_j_step > 0)

    _assert(x_name in src_dataset)
    _assert(y_name in src_dataset)
    x_var = src_dataset[x_name]
    y_var = src_dataset[y_name]
    if len(x_var.dims) == 1 and len(y_var.dims) == 1:
        y_var, x_var = xr.broadcast(y_var, x_var)
    _assert(len(x_var.dims) == 2)
    _assert(y_var.dims == x_var.dims)
    _assert(x_var.shape[-1] >= 2)
    _assert(x_var.shape[-2] >= 2)
    _assert(y_var.shape == x_var.shape)

    src_width = x_var.shape[-1]
    src_height = x_var.shape[-2]

    dst_region = _ensure_valid_region(dst_region, GLOBAL_GEO_EXTENT, x_var,
                                      y_var)
    dst_x1, dst_y1, dst_x2, dst_y2 = dst_region

    dst_res = max((dst_x2 - dst_x1) / dst_width,
                  (dst_y2 - dst_y1) / dst_height)
    _assert(dst_res > 0)

    dst_geo_transform = (dst_x1, dst_res, 0.0, dst_y2, 0.0, -dst_res)

    # Extract GCPs from full-res lon/lat 2D variables
    gcps = _get_gcps(x_var, y_var, gcp_i_step, gcp_j_step)

    if tp_x_name and tp_y_name and tp_x_name in src_dataset and tp_y_name in src_dataset:
        # If there are tie-point variables in the src_dataset
        tp_x_var = src_dataset[tp_x_name]
        tp_y_var = src_dataset[tp_y_name]
        _assert(len(tp_x_var.shape) == 2)
        _assert(tp_x_var.shape == tp_y_var.shape)
        tp_width = tp_x_var.shape[-1]
        tp_height = tp_x_var.shape[-2]
        _assert(tp_gcp_i_step is not None and tp_gcp_i_step > 0)
        _assert(tp_gcp_j_step is not None and tp_gcp_j_step > 0)
        # Extract GCPs also from tie-point lon/lat 2D variables
        tp_gcps = _get_gcps(tp_x_var, tp_y_var, tp_gcp_i_step, tp_gcp_j_step)
    else:
        # No tie-point variables
        tp_x_var = None
        tp_width = None
        tp_height = None
        tp_gcps = None

    mem_driver = gdal.GetDriverByName("MEM")

    dst_x2 = dst_x1 + dst_res * dst_width
    dst_y1 = dst_y2 - dst_res * dst_height

    dst_dataset = _new_dst_dataset(dst_width, dst_height, dst_res, dst_x1,
                                   dst_y1, dst_x2, dst_y2)

    if dst_resampling is None:
        dst_resampling = {}
    if isinstance(dst_resampling, str):
        dst_resampling = {
            var_name: dst_resampling
            for var_name in src_dataset.variables
        }

    for var_name in src_dataset.variables:
        src_var = src_dataset[var_name]

        if src_var.dims == x_var.dims:
            is_tp_var = False
            if var_name == x_name or var_name == y_name:
                if not include_xy_vars:
                    # Don't store lat and lon 2D vars in destination
                    continue
                dst_var_name = 'src_' + var_name
            else:
                dst_var_name = var_name
            # PERF: collect variables of same type and size and set band_count accordingly to speed up reprojection
            band_count = 1
            data_type = numpy_to_gdal_dtype(src_var.dtype)
            src_var_dataset = mem_driver.Create(f'src_{var_name}', src_width,
                                                src_height, band_count,
                                                data_type, [])
            src_var_dataset.SetGCPs(gcps, src_xy_crs)
        elif tp_x_var is not None and src_var.dims == tp_x_var.dims:
            is_tp_var = True
            if var_name == tp_x_name or var_name == tp_y_name:
                if not include_xy_vars:
                    # Don't store lat and lon 2D vars in destination
                    continue
                dst_var_name = 'src_' + var_name
            else:
                dst_var_name = var_name
            # PERF: collect variables of same type and size and set band_count accordingly to speed up reprojection
            band_count = 1
            data_type = numpy_to_gdal_dtype(src_var.dtype)
            src_var_dataset = mem_driver.Create(f'src_{var_name}', tp_width,
                                                tp_height, band_count,
                                                data_type, [])
            src_var_dataset.SetGCPs(tp_gcps, src_xy_crs)
        elif include_non_spatial_vars:
            # Store any variable as-is, that does not have the lat/lon 2D dims, then continue
            dst_dataset[var_name] = src_var
            continue
        else:
            continue

        # We use GDT_Float64 to introduce NaN as no-data-value
        dst_data_type = gdal.GDT_Float64
        dst_var_dataset = mem_driver.Create(f'dst_{var_name}', dst_width,
                                            dst_height, band_count,
                                            dst_data_type, [])
        dst_var_dataset.SetProjection(CRS_WKT_EPSG_4326)
        dst_var_dataset.SetGeoTransform(dst_geo_transform)

        # TODO (forman): PERFORMANCE: stack multiple variables of same src_data_type
        #                to perform the reprojection only once per stack

        # TODO (forman): CODE-DUPLICATION: refactor out common code block in reproject_crs_to_wgs84()

        for band_index in range(1, band_count + 1):
            src_var_dataset.GetRasterBand(band_index).SetNoDataValue(
                float('nan'))
            src_var_dataset.GetRasterBand(band_index).WriteArray(
                src_var.values)
            dst_var_dataset.GetRasterBand(band_index).SetNoDataValue(
                float('nan'))

        resample_alg, resample_alg_name = _get_resample_alg(
            dst_resampling,
            var_name,
            default=DEFAULT_TP_RESAMPLING if is_tp_var else DEFAULT_RESAMPLING)

        warp_mem_limit = 0
        error_threshold = 0
        # See http://www.gdal.org/structGDALWarpOptions.html
        options = ['INIT_DEST=NO_DATA']
        gdal.ReprojectImage(
            src_var_dataset,
            dst_var_dataset,
            None,
            None,
            resample_alg,
            warp_mem_limit,
            error_threshold,
            None,  # callback,
            None,  # callback_data,
            options)  # options

        dst_values = dst_var_dataset.GetRasterBand(1).ReadAsArray()
        # print(var_name, dst_values.shape, np.nanmin(dst_values), np.nanmax(dst_values))

        dst_dataset[dst_var_name] = _new_dst_variable(src_var, dst_values,
                                                      resample_alg_name)

    return dst_dataset
Exemplo n.º 12
0
    def vertical_averaging_weights(self,
                                   time_slice=slice(None),
                                   ztop=None,
                                   zbottom=None,
                                   dz=None,
                                   face_slice=slice(None)):
        """
        reimplementation of sunreader.Sunreader::averaging_weights
        
        returns: weights as array [faces,Nk] to average over a cell-centered quantity
        for the range specified by ztop,zbottom, and dz.

        range is specified by 2 of the 3 of ztop, zbottom, dz, all non-negative.
        ztop: dimensional distance from freesurface, 
        zbottom: dimensional distance from bed
        dz: thickness

        if the result would be an empty region, return nans.

        cell_select: an object which can be used to index into the cell dimension
          defaults to all cells.
          
        this thing is slow! - lots of time in adjusting all_dz

        order of dimensions has been altered to match local suntans netcdf code,
         i.e. face,level,time
        """
        mesh = self.nc[self.mesh_name]

        face_dim = self.face_dim

        if self.face_eta_vname is None:
            self.face_eta_vname = self.find_var(
                standard_name='sea_surface_height_above_geoid')
            assert self.face_eta_vname is not None, "Failed to discern eta variable"
        surface = self.face_eta_vname

        face_select = {face_dim: face_slice}
        h = self.nc[self.face_eta_vname].isel({
            self.time_dim: time_slice,
            face_dim: face_slice
        })

        if self.face_depth_vname is None:
            self.face_depth_vname = self.find_var(
                standard_name=[
                    "sea_floor_depth_below_geoid", "sea_floor_depth"
                ],
                location='face')  # ala 'Mesh_depth'
        if self.face_depth_vname is None:
            # c'mon people -- should be fixed in source now,
            self.face_depth_vname = self.find_var(
                stanford_name=[
                    "sea_floor_depth_below_geoid", "sea_floor_depth"
                ],
                location='face')  # ala 'Mesh_depth'

        depth = self.face_depth_vname
        assert depth is not None, "Failed to find depth variable"

        bed = self.nc[depth].isel(**face_select)
        if self.nc[depth].attrs.get('positive') == 'down':
            log.debug("Cell depth is positive-down")
            bed = -bed
        else:
            log.debug(
                "Cell depth is positive-up, or at least that is the assumption"
            )

        h, bed = xr.broadcast(h, bed)

        # for now, can only handle an array of cells - i.e. if you want
        # a single face, it's still going to process an array, just with
        # length 1.
        Ncells = len(bed)

        layers = self.nc[self.layer_var_name()]
        layer_vals = layers.values
        if layers.attrs.get('positive') == 'down':
            layer_vals = -layer_vals

        if 'bounds' in layers.attrs:
            layer_bounds = self.nc[layers.attrs['bounds']].values

            # hmm - some discrepancies over the dimensionality of layer_interfaces
            # assumption is probably that the dimensions are [layer,{top,bottom}]
            if layer_bounds.ndim == 2 and layer_bounds.shape[1] == 2:
                # same layer interfaces for all cells, all time.
                layer_interfaces = np.concatenate(
                    (layer_bounds[:, 0], layer_bounds[-1:, 1]))
                if layers.attrs.get('positive') == 'down':
                    layer_interfaces = -layer_interfaces
            else:
                raise Exception(
                    "Not smart enough about layer_bounds to do this")
        else:
            dz_single = 0 - bed.values.min(
            )  # assumes typ eta of 0.  only matters for 2D
            layer_interfaces = utils.center_to_edge(layer_vals,
                                                    dx_single=dz_single)
            layer_bounds = np.concatenate(
                (layer_interfaces[:-1, None], layer_interfaces[1:, None]),
                axis=1)
        # used to retain layer_interfaces for the top of the top and the
        # bottom of the bottom.  But that just makes for more cleanup
        # so now clip this to be interfaces between two layers.
        layer_interfaces = layer_interfaces[1:-1]

        # Calls to searchsorted below may need to negate both arguments
        # if increasing k maps to decreasing elevation.
        if np.all(np.diff(layer_interfaces) < 0):
            k_sign = -1
        elif np.all(np.diff(layer_interfaces) > 0):
            k_sign = 1
        else:
            raise Exception("Confused about the ordering of k")

        # this is a bit trickier, because there could be lumping.  for now, it should work okay
        # with 2-d, but won't be good for 3-d HERE if k is increasing up, this is WRONG
        # this used to be called Nk, but that's misleading.  it's the k index
        # of the bed layer, not the number of layers per water column.
        kbed = np.searchsorted(k_sign * layer_interfaces, k_sign * bed)

        one_dz = k_sign * (layer_bounds[:, 1] - layer_bounds[:, 0])
        all_dz = np.ones(h.shape + one_dz.shape) * one_dz
        all_k = np.ones(h.shape + one_dz.shape, np.int32) * np.arange(
            len(one_dz))

        # adjust bed and
        # 3 choices here..
        # try to clip to reasonable values at the same time:
        if ztop is not None:
            if ztop != 0:
                h = h - ztop  # don't modify h
                # don't allow h to go below the bed
                h[h < bed] = bed[h < bed]
            if dz is not None:
                # don't allow bed to be below the real bed.
                bed = np.maximum(h - dz, bed)
        if zbottom is not None:
            # no clipping checks for zbottom yet.
            if zbottom != 0:
                bed = bed + zbottom  # don't modify bed!
            if dz is not None:
                h = bed + dz

        # so now h and bed are elevations bounding the integration region
        # with this min call it's only correct for k_sign==-1
        ctops = np.searchsorted(
            k_sign * (layer_interfaces + self.surface_dzmin), k_sign * h)

        # default h_to_ctop will use the dzmin appropriate for the surface,
        # but at the bed, it goes the other way - safest just to say dzmin=0,
        # and also clamp to known Nk
        cbeds = np.searchsorted(k_sign * layer_interfaces, k_sign * bed)

        # dimension problems here - Nk has dimensions like face_slice or face_slice,time_slice
        # cbeds has dimensions like face_slice,time_slice
        # how to conditionally add dimensions to Nk?
        # for now, ASSUME that time is after face, and use shape of h to
        # figure out how to pad it
        while h.ndim > kbed.ndim:
            kbed = kbed[..., None]
        # also have to expand Nk so that the boolean indexing works

        # use to make cbeds exclusive indexing, but its cleaner to leave
        # ctops and cbeds both as inclusive, since it changes based on
        # k_sign
        if k_sign == -1:
            # keep cbed valid w.r.t. to deepest layer kbed,
            cbeds = np.minimum(cbeds, kbed)
            drymask = (all_k < ctops[..., None]) | (all_k > cbeds[..., None])
        else:
            cbeds = np.maximum(cbeds, kbed)  # maybe redundant now
            drymask = (all_k < cbeds[..., None]) | (all_k > ctops[..., None])

        all_dz[drymask] = 0.0

        ii = tuple(np.indices(h.shape))
        z = layer_bounds.min(axis=1)  # bottom of each cell
        all_dz[ii + (ctops[ii], )] = h - z[ctops]
        all_dz[ii + (cbeds[ii], )] -= bed - z[cbeds]

        # make those weighted averages
        # have to add extra axis to get broadcasting correct
        all_dz = all_dz / np.sum(all_dz, axis=-1)[..., None]

        if all_dz.ndim == 3:
            # we have both time and level
            # transpose to match the shape of velocity data -
            all_dz = all_dz.transpose([0, 2, 1])
        return all_dz
Exemplo n.º 13
0
def median_absolute_error(a, b, dim=None, skipna=False, keep_attrs=False):
    """
    Median Absolute Error.

    .. math::
        \\mathrm{median}(\\vert a - b\\vert)

    Parameters
    ----------
    a : xarray.Dataset or xarray.DataArray
        Labeled array(s) over which to apply the function.
    b : xarray.Dataset or xarray.DataArray
        Labeled array(s) over which to apply the function.
    dim : str, list
        The dimension(s) to apply the median absolute error along.
        Note that this dimension will be reduced as a result.
        Defaults to None reducing all dimensions.
    skipna : bool
        If True, skip NaNs when computing function.
    keep_attrs : bool
        If True, the attributes (attrs) will be copied
        from the first input to the new one.
        If False (default), the new object will
        be returned without attributes.

    Returns
    -------
    xarray.Dataset or xarray.DataArray
        Median Absolute Error.

    See Also
    --------
    sklearn.metrics.median_absolute_error

    Examples
    --------
    >>> import numpy as np
    >>> import xarray as xr
    >>> from xskillscore import median_absolute_error
    >>> a = xr.DataArray(np.random.rand(5, 3, 3),
                        dims=['time', 'x', 'y'])
    >>> b = xr.DataArray(np.random.rand(5, 3, 3),
                        dims=['time', 'x', 'y'])
    >>> median_absolute_error(a, b, dim='time')
    """
    dim, axis = _preprocess_dims(dim, a)
    a, b = xr.broadcast(a, b, exclude=dim)

    return xr.apply_ufunc(
        _median_absolute_error,
        a,
        b,
        input_core_dims=[dim, dim],
        kwargs={
            "axis": axis,
            "skipna": skipna
        },
        dask="parallelized",
        output_dtypes=[float],
        keep_attrs=keep_attrs,
    )
Exemplo n.º 14
0
    dx=median_dx

if dz is None:
    all_dz=[ np.abs(get_z_dz(tran).values.ravel())
             for tran in trans]
    all_dz=np.concatenate( all_dz )
    # generally want to retain most of the vertical
    # resolution, but not minimum dz since there could be
    # some partial layers, near-field layers, etc.
    # even 10th percentile may be small.
    dz=np.percentile(all_dz,10)

# Get the maximum range of valid vertical
z_bnds=[]
for tran in trans:
    V,z_full,z_dz = xr.broadcast(tran.Ve, tran.z_ctr, get_z_dz(tran))
    valid=np.isfinite(V.values)
    z_valid=z_full.values[valid]
    z_low=z_full.values[valid] - z_dz.values[valid]/2.0
    z_high=z_full.values[valid] + z_dz.values[valid]/2.0
    z_bnds.append( [z_low.min(), z_high.max()] )

z_bnds=np.concatenate(z_bnds)
z_min=z_bnds.min()
z_max=z_bnds.max()

# Resample each transect in the vertical:
new_z=np.linspace(z_min,z_max,int(round((z_max-z_min)/dz)))

##
Exemplo n.º 15
0
start = (37.0, -105.0)
end = (35.5, -65.0)

##############################
# Get the cross section, and convert lat/lon to supplementary coordinates:

cross = cross_section(data, start, end)
cross.set_coords(('lat', 'lon'), True)
print(cross)

##############################
# For this example, we will be plotting potential temperature, relative humidity, and
# tangential/normal winds. And so, we need to calculate those, and add them to the dataset:

temperature, pressure, specific_humidity = xr.broadcast(
    cross['Temperature'], cross['isobaric'], cross['Specific_humidity'])

theta = mpcalc.potential_temperature(pressure, temperature)
rh = mpcalc.relative_humidity_from_specific_humidity(specific_humidity,
                                                     temperature, pressure)

# These calculations return unit arrays, so put those back into DataArrays in our Dataset
cross['Potential_temperature'] = xr.DataArray(theta,
                                              coords=temperature.coords,
                                              dims=temperature.dims,
                                              attrs={'units': theta.units})
cross['Relative_humidity'] = xr.DataArray(rh,
                                          coords=specific_humidity.coords,
                                          dims=specific_humidity.dims,
                                          attrs={'units': rh.units})
Exemplo n.º 16
0
def _apply_metric_at_given_lead(
    verif,
    verif_dates,
    lead,
    hind=None,
    hist=None,
    inits=None,
    reference=None,
    metric=None,
    comparison=None,
    dim=None,
    **metric_kwargs,
):
    """Applies a metric between two time series at a given lead.

    .. note::

        This will be moved to a method of the `Scoring()` class in the next PR.

    Args:
        verif (xr object): Verification data.
        verif_dates (dict): Lead-dependent verification dates for alignment.
        lead (int): Given lead to score.
        hind (xr object): Initialized hindcast. Not required in a persistence forecast.
        hist (xr object): Historical simulation. Required when
            ``reference='historical'``.
        inits (dict): Lead-dependent initialization dates for alignment.
        reference (str): If not ``None``, return score for this reference forecast.
            * 'persistence'
            * 'historical'
        metric (Metric): Metric class for scoring.
        comparison (Comparison): Comparison class.
        dim (str): Dimension to apply metric over.

    Returns:
        result (xr object): Metric results for the given lead for the initialized
            forecast or reference forecast.
    """
    if reference is None:
        # Use `.where()` instead of `.sel()` to account for resampled inits when
        # bootstrapping.
        a = (hind.sel(lead=lead).where(hind['time'].isin(inits[lead]),
                                       drop=True).drop_vars('lead'))
        b = verif.sel(time=verif_dates[lead])
    elif reference == 'persistence':
        a, b = persistence(verif, inits, verif_dates, lead)
    elif reference == 'historical':
        a, b = historical(hist, verif, verif_dates, lead)
    a['time'] = b['time']

    # broadcast dims when deterministic metric and apply over member
    if (a.dims != b.dims) and (dim == 'member') and not metric.probabilistic:
        a, b = xr.broadcast(a, b)
    result = metric.function(
        a,
        b,
        dim=dim,
        comparison=comparison,
        **metric_kwargs,
    )
    log_compute_hindcast_inits_and_verifs(dim, lead, inits, verif_dates)
    return result
Exemplo n.º 17
0
def lateral_fill(da_in, isvalid_mask, ltripole=False, tol=1.0e-4,
                 use_sor=False, rc=1.8, max_iter=1000):
    """Perform lateral fill on xarray.DataArray

    Parameters
    ----------
    da_in : xarray.DataArray
      DataArray on which to fill NaNs. Fill is performed on the two
      rightmost dimenions. Grid is assumed periodic in `x` direction
      (last dimension).

    isvalid_mask : xarray.DataArray, boolean
      Valid values mask: `True` where data should be filled. Must have the
      same rightmost dimenions as `da_in`.

    ltripole : boolean, optional [default=False]
      Logical flag; if `True` then treat the top row of the grid as periodic
      in the sense of a tripole grid.

     tol : float, optional [default=1.0e-4]
      Convergence criteria: stop filling when values change is less or equal
      to `tol * var`; i.e. `delta <= tol * np.abs(var[j, i])`.

    use_sor: boolean, optional [default=False]
      switch to select SOR fill algorithm over progressive fill algorithm

    rc : float, optional [default=1.8, valid bounds=(1.0,2.0)]
       over-relaxation coefficient to use in SOR fill algorithm. Larger arrrays
       typically converge faster with larger coefficients. For 1 deg. grid (360x180)
       a coefficient in the range 1.85-1.9 is near optimal.

    max_iter : integer, optional, [default=1000]
       maximum number of iterations to do before giving up if tol is not reached.

    Returns
    -------
    da_out : xarray.DataArray
      DataArray with NaNs filled by iterative smoothing.

    """
    print("IN FOB version : lateral_fill")

    dims_in = da_in.dims
    non_lateral_dims = dims_in[:-2]

    attrs = da_in.attrs
    encoding = da_in.encoding
    coords = da_in.coords

    da_in, isvalid_mask = xr.broadcast(da_in, isvalid_mask)

    if len(non_lateral_dims) > 0:
        da_in_stack = da_in.stack(non_lateral_dims=non_lateral_dims)
        da_out_stack = xr.full_like(da_in_stack, fill_value=np.nan)
        isvalid_mask_stack = isvalid_mask.stack(non_lateral_dims=non_lateral_dims)
        for i in range(da_in_stack.shape[-1]):
            arr = da_in_stack.data[:, :, i]
            da_out_stack[:, :, i] = lateral_fill_np_array(arr, isvalid_mask_stack.data[:, :, i],
                                                          ltripole,tol,use_sor,rc,max_iter)

        da_out = da_out_stack.unstack('non_lateral_dims').transpose(*dims_in)

    else:
        da_out = xr.full_like(da_in, fill_value=np.nan)
        da_out[:, :] = lateral_fill_np_array(da_in.data, isvalid_mask.data,
                                             ltripole,tol,use_sor,rc,max_iter)

    da_out.attrs = attrs
    da_out.encoding = encoding
    for k, da in coords.items():
        da_out[k].attrs = da.attrs

    return da_out
Exemplo n.º 18
0
def compute_perfect_model(
    init_pm,
    control,
    metric='pearson_r',
    comparison='m2e',
    dim=None,
    add_attrs=True,
    **metric_kwargs,
):
    """
    Compute a predictability skill score for a perfect-model framework
    simulation dataset.

    Args:
        init_pm (xarray object): ensemble with dims ``lead``, ``init``, ``member``.
        control (xarray object): control with dimension ``time``.
        metric (str): `metric` name, see
         :py:func:`climpred.utils.get_metric_class` and (see :ref:`Metrics`).
        comparison (str): `comparison` name defines what to take as forecast
            and verification (see
            :py:func:`climpred.utils.get_comparison_class` and :ref:`Comparisons`).
        dim (str or list): dimension to apply metric over. default: ['member', 'init']
        add_attrs (bool): write climpred compute args to attrs. default: True
        ** metric_kwargs (dict): additional keywords to be passed to metric.
            (see the arguments required for a given metric in metrics.py)

    Returns:
        skill (xarray object): skill score with dimensions as input `ds`
                               without `dim`.

    """
    # Check that init is int, cftime, or datetime; convert ints or cftime to datetime.
    init_pm = convert_time_index(init_pm,
                                 'init',
                                 'init_pm[init]',
                                 calendar=PM_CALENDAR_STR)

    # check args compatible with each other
    metric, comparison, dim = _get_metric_comparison_dim(metric,
                                                         comparison,
                                                         dim,
                                                         kind='PM')

    forecast, verif = comparison.function(init_pm, metric=metric)

    # in case you want to compute deterministic skill over member dim
    if (forecast.dims != verif.dims) and not metric.probabilistic:
        forecast, verif = xr.broadcast(forecast, verif)

    skill = metric.function(forecast,
                            verif,
                            dim=dim,
                            comparison=comparison,
                            **metric_kwargs)
    if comparison.name == 'm2m':
        skill = skill.mean(M2M_MEMBER_DIM)
    # Attach climpred compute information to skill
    if add_attrs:
        skill = assign_attrs(
            skill,
            init_pm,
            function_name=inspect.stack()[0][3],
            metric=metric,
            comparison=comparison,
            dim=dim,
            metadata_dict=metric_kwargs,
        )
    return skill
Exemplo n.º 19
0
def truncate_dataarray(dataarray,
                       quantile_dims,
                       replace_with_mean=False,
                       mean_dims=None,
                       weights=None,
                       quantiles=None,
                       extra_dim=None):
    r"""Truncates the dataarray over the given dimensions, meaning that data
    outside the upper and lower quantiles, which are taken across the
    dimensions ``quantile_dims``, are replaced either with:
    1. the upper and lower quantiles themselves.
    2. or with the mean of the in-lier data, which is taken across the
       dimensions given by ``mean_dims``.

    **Note**: If weights are given, then weighted-quantiles and weighted-means
    are taken, otherwise the quantiles and means are unweighted.

    Args:
        dataarray (xarray.DataArray):
            dataarray that has at least the dimensions given by ``dims``, and
            if ``replace_with_mean`` is True, then also ``mean_dims``.
        replace_with_mean (bool, optional):
            If True, then replace values outside of the upper and lower
            quantiles and with the mean across the dimensions given by
            `mean_dims`, if False, then replace with the upper and lower bounds
            themselves.
        mean_dims (list[str], optional):
            dimensions to take mean within the bounds over
        quantile_dims (list[str]):
            dimensions to take quantiles over -- the quantiles are
            used to make the bounds.
        weights (xarray.DataArray, optional):
            Must have one dimension and can have up two dimensions.
        quantiles (tuple[float, float] | list[float, float], optional):
            The tuple of two floats representing the quantiles to take.
        extra_dim (str):
            Extra dimension that exists in `weights` and `data`. It should not
            be in `stat_dims`.
    Returns:
        (xarray.DataArray):
            Same shape as the original array, but with truncated values.
    Raises:
        (ValueError):
            If `replace_with_mean` is True, and `mean_dims` is not list of
            strings.
    """
    LOGGER.debug("Entering the `truncate_dataarray` function")

    LOGGER.debug("quantile_dims:{}".format(quantile_dims))
    LOGGER.debug("replace_with_mean:{}".format(replace_with_mean))
    LOGGER.debug("mean_dims:{}".format(mean_dims))
    LOGGER.debug("weights:{}".format(weights))
    LOGGER.debug("quantiles:{}".format(quantiles))
    LOGGER.debug("extra_dim:{}".format(extra_dim))

    if replace_with_mean and not mean_dims:
        mean_dims_err_msg = (
            "If `replace_with_mean` is True, then `mean_dims` "
            "must be a list of strings")
        LOGGER.error(mean_dims_err_msg)
        raise ValueError(mean_dims_err_msg)
    else:
        pass  # `mean_dims` doesn't can be None

    quantiles = (Quantiles(
        *sorted(quantiles)) if quantiles else Quantiles(0.05, 0.95))

    if weights is not None:
        quantile_values = weighted_quantile_with_extra_dim(
            dataarray, quantiles, list(quantile_dims), weights, extra_dim)
    else:
        quantile_values = dataarray.quantile(quantiles,
                                             dim=list(quantile_dims))
    lower_da = quantile_values.sel(quantile=quantiles.lower)
    upper_da = quantile_values.sel(quantile=quantiles.upper)

    if replace_with_mean:
        good_indexes = (dataarray >= lower_da) & (dataarray <= upper_da)
        inside_da = dataarray.where(good_indexes)
        outside_da = dataarray.where(~good_indexes)

        if weights is not None:
            inside_mean_da = weighted_mean_with_extra_dim(
                inside_da, mean_dims, weights, extra_dim)
        else:
            inside_mean_da = inside_da.mean(mean_dims)

        truncated_da = (inside_da.combine_first(
            xr.ones_like(outside_da) * inside_mean_da))
    else:
        expanded_lower_da, _ = xr.broadcast(lower_da, dataarray)
        expanded_lower_da = expanded_lower_da.transpose(*dataarray.coords.dims)

        expanded_upper_da, _ = xr.broadcast(upper_da, dataarray)
        expanded_upper_da = expanded_upper_da.transpose(*dataarray.coords.dims)

        truncated_da = dataarray.clip(min=expanded_lower_da,
                                      max=expanded_upper_da)
    LOGGER.debug("Leaving the `truncate_dataarray` function")
    return truncated_da
        ) * 100
        # assign back to the dataframe
        df_window.loc[sinds, 'PNI'] = y[f'{var_col}{rolling_window}'].values


# -----------------------------------------------------------------------------
## calculate Percent of Normal Index (PNI)
# -----------------------------------------------------------------------------




# -----------------------------------------------------------------------------
## Trying to test the broadcast functionality
# -----------------------------------------------------------------------------
xr.broadcast(c_window, mthly_climatology)

# ASSIGN DIMENSION to xarray object
min_yr = 2010
max_yr = 2015
n_yrs = max_yr - min_yr + 1
da = xr.DataArray(
    c_window['time.month'].values,
    coords=[('month', np.tile(np.arange(1, 13), n_yrs))]
)
c_window, _ = xr.broadcast(c_window, da)


# apply to each month (easier to )
mth = 1
c_window.sel(month=mth)
Exemplo n.º 21
0
import read_sontek

##

six.moves.reload_module(read_sontek)

rivr_fn='040518_7_BTref/20180405125420r.rivr'

ds=read_sontek.surveyor_to_xr(rivr_fn,proj='EPSG:26910')

##

# Transect of speed
plt.figure(1).clf()
fig,ax=plt.subplots(num=1)
x,z,speed = xr.broadcast(ds.track_dist,-ds.location,ds.water_speed)
scat=ax.scatter(x, z, 40, speed, cmap='jet')
plt.colorbar(scat)

##

# Plan view, scatter and quiver
plt.figure(2).clf()
fig,ax=plt.subplots(num=2)
scat=ax.scatter(ds.x_utm, ds.y_utm, 40, ds.mean_water_speed, cmap='jet')

avg_east=ds.Ve.mean(dim='cell')
avg_north=ds.Vn.mean(dim='cell')
quiv=ax.quiver(ds.x_utm.values, ds.y_utm.values, avg_east.values, avg_north.values)

plt.colorbar(scat,label='Speed m/s')
Exemplo n.º 22
0
    def sinusoidal(self):
        
        moreval = 1
        step    = 0.1
        
        # Horizontal Dimensions
        X      = xr.DataArray( np.arange(self.NX*moreval),       dims = 'X')  * step
        Xp1    = xr.DataArray( np.arange(self.NX*moreval+1)-0.5, dims = 'Xp1')* step
        Y      = xr.DataArray( np.arange(self.NY*moreval),       dims = 'Y')  * step
        Yp1    = xr.DataArray( np.arange(self.NY*moreval+1)-0.5, dims = 'Yp1')* step
        
        # Vertical Dimensions
        Z      = xr.DataArray(-np.arange(self.NZ*moreval)-0.5, dims = 'Z')  * step
        Zp1    = xr.DataArray(-np.arange(self.NZ*moreval+1),   dims = 'Zp1')* step
        Zu     = xr.DataArray(-np.arange(self.NZ*moreval)-1,   dims = 'Zu') * step
        Zl     = xr.DataArray(-np.arange(self.NZ*moreval),     dims = 'Zl') * step
        
        # Space Coordinates
        YC, XC = xr.broadcast(Y,   X)
        YG, XG = xr.broadcast(Yp1, Xp1)
        YU, XU = xr.broadcast(Y  , Xp1)
        YV, XV = xr.broadcast(Yp1, X)
        
        # Spacing
        drC = xr.full_like(Zp1, step)
        drF = xr.full_like(Z  , step)
        dxC = xr.full_like(XU,  step)
        dyC = xr.full_like(XV,  step)
        dxF = xr.full_like(XC,  step)
        dyF = xr.full_like(XC,  step)
        dxG = xr.full_like(XV,  step)
        dyG = xr.full_like(XU,  step)
        dxV = xr.full_like(XG,  step)
        dyU = xr.full_like(XG,  step)
        
        # Areas
        rA  = dxF * dyF
        rAw = dxC * dyG
        rAs = dxG * dyC 
        rAz = dxV * dyU 

        # HFac
        HFacC, _ = xr.broadcast(xr.full_like(Z, 1), xr.full_like(XC, 1))
        HFacW, _ = xr.broadcast(xr.full_like(Z, 1), xr.full_like(XU, 1))
        HFacS, _ = xr.broadcast(xr.full_like(Z, 1), xr.full_like(XV, 1))
        
        # Sin C points
        sinZ, sinY, sinX = xr.broadcast(np.sin(Z), np.sin(Y), np.sin(X))
        
        # Sin vel points
        sinUZ, sinUY , sinUX = xr.broadcast(np.sin(Z) , np.sin(Y)  , np.sin(Xp1))
        sinVZ, sinVY , sinVX = xr.broadcast(np.sin(Z) , np.sin(Yp1), np.sin(X))
        sinWZ, sinWY , sinWX = xr.broadcast(np.sin(Zl), np.sin(Y)  , np.sin(X))
        

        return xr.Dataset({'X'     : X,      'Xp1'   : Xp1, 
                           'Y'     : Y,      'Yp1'   : Yp1,
                           'Z'     : Z,      'Zp1'   : Zp1, 'Zu': Zu, 'Zl': Zl,
                           'YC'    : YC,     'XC'    : XC, 
                           'YG'    : YG,     'XG'    : XG, 
                           'YU'    : YU,     'XU'    : XU, 
                           'YV'    : YV,     'XV'    : XV,
                           'drC'   : drC,    'drF'   : drF,
                           'dxC'   : dxC,    'dyC'   : dyC,
                           'dxF'   : dxF,    'dyF'   : dyF,
                           'dxG'   : dxG,    'dyG'   : dyG,
                           'dxV'   : dxV,    'dyU'   : dyU,
                           'rA'    : rA,     'rAw'   : rAw,
                           'rAs'   : rAs,    'rAz'   : rAz,
                           'HFacC' : HFacC,  'HFacW' : HFacW, 'HFacS' : HFacS,
                           'sinZ'  : sinZ,   'sinY'  : sinY,  'sinX'  : sinX,
                           'sinUZ' : sinUZ,  'sinUY' : sinUY, 'sinUX' : sinUX,
                           'sinVZ' : sinVZ,  'sinVY' : sinVY, 'sinVX' : sinVX,
                           'sinWZ' : sinWZ,  'sinWY' : sinWY, 'sinWX' : sinWX})
Exemplo n.º 23
0
 def getProfileAllBroadcasted(self, variables=None, sel={}):
     if variables is None:
         return xr.broadcast(self.profile.sel(**sel))[0]
     else:
         return xr.broadcast(self.profile.sel(**sel)[variables])[0]
Exemplo n.º 24
0
# Cross section along 69.3 latitude and between 1 and 25 longitude
# Andenes = 16deg longitude
start = (69.3, 1)
end = (69.3, 25)

cross_data = data[[
    'cloud_area_fraction_pl', 'air_temperature_pl', 'relative_humidity_pl'
]]
cross = cross_section(cross_data, start, end).set_coords(
    ('latitude', 'longitude'))
# Inverse the pressure axes (doesn't work as intended)
# cross = cross.reindex(pressure=list(reversed(cross.pressure)))

temperature, clouds, relative_humidity = xr.broadcast(
    cross['air_temperature_pl'], cross['cloud_area_fraction_pl'],
    cross['relative_humidity_pl'])

# Plot the cross section
fig, axs = plt.subplots(nrows=3,
                        ncols=3,
                        sharey=True,
                        sharex=True,
                        figsize=(14, 10))
ax = axs.ravel().tolist()
j = 0
# Define the figure object and primary axes
for i in [0, 6, 12, 18, 24, 30, 36, 42, 48]:

    # Plot RH using contourf
    rh_contour = ax[j].contourf(cross['longitude'],
Exemplo n.º 25
0
def calc_com_incline_and_orientation_angle(da_mask,
                                           return_centerline_pts=False):
    """
    Calculate approximate shear angle of object (theta) and xy-orientation
    angle (phi) from the change of xy-position of the center-of-mass computed
    separately at every height
    """
    if np.any(da_mask.isnull()):
        m = ~da_mask.isnull()
    else:
        m = da_mask

    # need to center coordinates on "center of mass" (assuming constant density)
    if len(da_mask.x.shape) == 3:
        x_3d = da_mask.x
        y_3d = da_mask.y
        z_3d = da_mask.z
    else:
        x_3d, y_3d, z_3d = xr.broadcast(da_mask.x, da_mask.y, da_mask.z)

    # compute mean xy-position at every height z, this is the effective
    # centre-of-mass
    kws = dict(dtype='float64', dim=('x', 'y'))
    x_c = x_3d.where(m).mean(
        **kws)  # other=nan so that these get excluded from mean calculation
    y_c = y_3d.where(m).mean(**kws)

    try:
        dx = np.gradient(x_c)
        dy = np.gradient(y_c)

        dx_mean = np.nanmean(dx)
        dy_mean = np.nanmean(dy)

        dl_mean = np.sqrt(dx_mean**2. + dy_mean**2.)
        dz_mean = np.nanmean(np.gradient(x_c.z))

        theta = np.arctan2(dl_mean, dz_mean)
        phi = np.arctan2(dy_mean, dx_mean)
    except ValueError:
        phi = theta = np.nan

    phi = np.rad2deg(phi)
    theta = np.rad2deg(theta)

    if phi < 0:
        phi += 360.

    ds = xr.merge([
        xr.DataArray(phi,
                     name='phi',
                     attrs=dict(long_name='xy-plane angle', units='deg')),
        xr.DataArray(theta,
                     name='theta',
                     attrs=dict(long_name='z-axis slope angle', units='deg')),
    ])

    if return_centerline_pts:
        return ds, [x_c, y_c, da_mask.z]
    else:
        return ds
Exemplo n.º 26
0
            z = -tran_dss[0].depth_bt
        else:
            # for untrim output:
            z = -(tran_dss[0].z_surf - tran_dss[0].z_bed)
        zmax = 0.0
        zmin = z.min() - 0.2
        xmin = x_lat.min() - 3.0
        xmax = x_lat.max() + 3.0

        for repeat, ds in enumerate(tran_dss):
            ds_lateral = ds_to_linear(ds)

            Vlong = ds.Ve * along_unit[0] + ds.Vn * along_unit[1]
            Vlat = ds.Ve * across_unit[0] + ds.Vn * across_unit[1]

            X, Z = xr.broadcast(ds_lateral, ds.z_ctr)

            fig = plt.figure(4)
            fig.clf()
            fig.set_size_inches((10, 6), forward=True)
            fig, (ax_lon, ax_lat) = plt.subplots(2,
                                                 1,
                                                 num=4,
                                                 sharex=True,
                                                 sharey=True)

            scat_lon = ax_lon.scatter(X,
                                      Z,
                                      30,
                                      Vlong,
                                      cmap='jet',
Exemplo n.º 27
0
def bootstrap_compute(
    hind,
    verif,
    hist=None,
    alignment="same_verifs",
    metric="pearson_r",
    comparison="m2e",
    dim="init",
    reference=None,
    resample_dim="member",
    sig=95,
    iterations=500,
    pers_sig=None,
    compute=compute_hindcast,
    resample_uninit=bootstrap_uninitialized_ensemble,
    **metric_kwargs,
):
    """Bootstrap compute with replacement.

    Args:
        hind (xr.Dataset): prediction ensemble.
        verif (xr.Dataset): Verification data.
        hist (xr.Dataset): historical/uninitialized simulation.
        metric (str): `metric`. Defaults to 'pearson_r'.
        comparison (str): `comparison`. Defaults to 'm2e'.
        dim (str or list): dimension(s) to apply metric over. default: 'init'.
        reference (str, list of str): Type of reference forecasts with which to
            verify. One or more of ['persistence', 'uninitialized'].
            If None or empty, returns no p value.
        resample_dim (str): dimension to resample from. default: 'member'::

            - 'member': select a different set of members from hind
            - 'init': select a different set of initializations from hind

        sig (int): Significance level for uninitialized and
                   initialized skill. Defaults to 95.
        pers_sig (int): Significance level for persistence skill confidence levels.
                        Defaults to sig.
        iterations (int): number of resampling iterations (bootstrap
                         with replacement). Defaults to 500.
        compute (func): function to compute skill.
                        Choose from
                        [:py:func:`climpred.prediction.compute_perfect_model`,
                         :py:func:`climpred.prediction.compute_hindcast`].
        resample_uninit (func): function to create an uninitialized ensemble
                        from a control simulation or uninitialized large
                        ensemble. Choose from:
                        [:py:func:`bootstrap_uninitialized_ensemble`,
                         :py:func:`bootstrap_uninit_pm_ensemble_from_control`].
        ** metric_kwargs (dict): additional keywords to be passed to metric
            (see the arguments required for a given metric in :ref:`Metrics`).

    Returns:
        results: (xr.Dataset): bootstrapped results for the three different skills:

            - `initialized` for the initialized hindcast `hind` and describes skill due
             to initialization and external forcing
            - `uninitialized` for the uninitialized/historical and approximates skill
             from external forcing
            - `persistence` for the persistence forecast computed by
              `compute_persistence`

        the different results:
            - `verify skill`: skill values
            - `p`: p value
            - `low_ci` and `high_ci`: high and low ends of confidence intervals based
             on significance threshold `sig`


    Reference:
        * Goddard, L., A. Kumar, A. Solomon, D. Smith, G. Boer, P.
          Gonzalez, V. Kharin, et al. “A Verification Framework for
          Interannual-to-Decadal Predictions Experiments.” Climate
          Dynamics 40, no. 1–2 (January 1, 2013): 245–72.
          https://doi.org/10/f4jjvf.

    See also:
        * climpred.bootstrap.bootstrap_hindcast
        * climpred.bootstrap.bootstrap_perfect_model
    """
    warn_if_chunking_would_increase_performance(hind, crit_size_in_MB=5)
    if pers_sig is None:
        pers_sig = sig
    if isinstance(dim, str):
        dim = [dim]
    if isinstance(reference, str):
        reference = [reference]
    if reference is None:
        reference = []

    p = (100 - sig) / 100
    ci_low = p / 2
    ci_high = 1 - p / 2
    p_pers = (100 - pers_sig) / 100
    ci_low_pers = p_pers / 2
    ci_high_pers = 1 - p_pers / 2

    # get metric/comparison function name, not the alias
    metric = METRIC_ALIASES.get(metric, metric)
    comparison = COMPARISON_ALIASES.get(comparison, comparison)

    # get class Metric(metric)
    metric = get_metric_class(metric, ALL_METRICS)
    # get comparison function
    comparison = get_comparison_class(comparison, ALL_COMPARISONS)

    # Perfect Model requires `same_inits` setup
    isHindcast = True if comparison.name in HINDCAST_COMPARISONS else False
    reference_alignment = alignment if isHindcast else "same_inits"
    chunking_dims = [d for d in hind.dims if d not in CLIMPRED_DIMS]

    # carry alignment for compute_reference separately
    metric_kwargs_reference = metric_kwargs.copy()
    metric_kwargs_reference["alignment"] = reference_alignment
    # carry alignment in metric_kwargs
    if isHindcast:
        metric_kwargs["alignment"] = alignment

    if hist is None:  # PM path, use verif = control
        hist = verif

    # slower path for hindcast and resample_dim init
    if resample_dim == "init" and isHindcast:
        warnings.warn("resample_dim=`init` will be slower than resample_dim=`member`.")
        (
            bootstrapped_init_skill,
            bootstrapped_uninit_skill,
            bootstrapped_pers_skill,
        ) = _bootstrap_hindcast_over_init_dim(
            hind,
            hist,
            verif,
            dim,
            reference,
            resample_dim,
            iterations,
            metric,
            comparison,
            compute,
            resample_uninit,
            **metric_kwargs,
        )
    else:  # faster: first _resample_iterations_idx, then compute skill
        resample_func = _get_resample_func(hind)
        if not isHindcast:
            if "uninitialized" in reference:
                # create more members than needed in PM to make the uninitialized
                # distribution more robust
                members_to_sample_from = 50
                repeat = members_to_sample_from // hind.member.size + 1
                uninit_hind = xr.concat(
                    [resample_uninit(hind, hist) for i in range(repeat)],
                    dim="member",
                    **CONCAT_KWARGS,
                )
                uninit_hind["member"] = np.arange(1, 1 + uninit_hind.member.size)
                if dask.is_dask_collection(uninit_hind):
                    # too minimize tasks: ensure uninit_hind get pre-computed
                    # alternativly .chunk({'member':-1})
                    uninit_hind = uninit_hind.compute().chunk()
                # resample uninit always over member and select only hind.member.size
                bootstrapped_uninit = resample_func(
                    uninit_hind,
                    iterations,
                    "member",
                    replace=False,
                    dim_max=hind["member"].size,
                )
                bootstrapped_uninit["lead"] = hind["lead"]
                # effectively only when _resample_iteration_idx which doesnt use dim_max
                bootstrapped_uninit = bootstrapped_uninit.isel(
                    member=slice(None, hind.member.size)
                )
                bootstrapped_uninit["member"] = np.arange(
                    1, 1 + bootstrapped_uninit.member.size
                )
                if dask.is_dask_collection(bootstrapped_uninit):
                    bootstrapped_uninit = bootstrapped_uninit.chunk({"member": -1})
                    bootstrapped_uninit = _maybe_auto_chunk(
                        bootstrapped_uninit, ["iteration"] + chunking_dims
                    )
        else:  # hindcast
            if "uninitialized" in reference:
                uninit_hind = resample_uninit(hind, hist)
                if dask.is_dask_collection(uninit_hind):
                    # too minimize tasks: ensure uninit_hind get pre-computed
                    # maybe not needed
                    uninit_hind = uninit_hind.compute().chunk()
                bootstrapped_uninit = resample_func(
                    uninit_hind, iterations, resample_dim
                )
                bootstrapped_uninit = bootstrapped_uninit.isel(
                    member=slice(None, hind.member.size)
                )
                bootstrapped_uninit["lead"] = hind["lead"]
                if dask.is_dask_collection(bootstrapped_uninit):
                    bootstrapped_uninit = _maybe_auto_chunk(
                        bootstrapped_uninit.chunk({"lead": 1}),
                        ["iteration"] + chunking_dims,
                    )

        if "uninitialized" in reference:
            bootstrapped_uninit_skill = compute(
                bootstrapped_uninit,
                verif,
                metric=metric,
                comparison="m2o" if isHindcast else comparison,
                dim=dim,
                add_attrs=False,
                **metric_kwargs,
            )
            # take mean if 'm2o' comparison forced before
            if isHindcast and comparison != __m2o:
                bootstrapped_uninit_skill = bootstrapped_uninit_skill.mean("member")

        bootstrapped_hind = resample_func(hind, iterations, resample_dim)
        if dask.is_dask_collection(bootstrapped_hind):
            bootstrapped_hind = bootstrapped_hind.chunk({"member": -1})

        bootstrapped_init_skill = compute(
            bootstrapped_hind,
            verif,
            metric=metric,
            comparison=comparison,
            add_attrs=False,
            dim=dim,
            **metric_kwargs,
        )
        if "persistence" in reference:
            pers_skill = compute_persistence(
                hind,
                verif,
                metric=metric,
                dim=dim,
                **metric_kwargs_reference,
            )
            # bootstrap pers
            if resample_dim == "init":
                bootstrapped_pers_skill = compute_persistence(
                    bootstrapped_hind,
                    verif,
                    metric=metric,
                    **metric_kwargs_reference,
                )
            else:  # member no need to calculate all again
                bootstrapped_pers_skill, _ = xr.broadcast(
                    pers_skill, bootstrapped_init_skill
                )

    # calc mean skill without any resampling
    init_skill = compute(
        hind,
        verif,
        metric=metric,
        comparison=comparison,
        dim=dim,
        **metric_kwargs,
    )
    if "uninitialized" in reference:
        # uninit skill as mean resampled uninit skill
        unin_skill = bootstrapped_uninit_skill.mean("iteration")  # noqa: F841
    if "persistence" in reference:
        pers_skill = compute_persistence(
            hind, verif, metric=metric, dim=dim, **metric_kwargs_reference
        )
    if "climatology" in reference:
        clim_skill = compute_climatology(
            hind, verif, metric=metric, dim=dim, comparison=comparison, **metric_kwargs
        )
        bootstrapped_clim_skill, _ = xr.broadcast(clim_skill, bootstrapped_init_skill)

    # get confidence intervals CI
    init_ci = _distribution_to_ci(bootstrapped_init_skill, ci_low, ci_high)
    if "uninitialized" in reference:
        unin_ci = _distribution_to_ci(  # noqa: F841
            bootstrapped_uninit_skill, ci_low, ci_high
        )
    if "climatology" in reference:
        clim_ci = _distribution_to_ci(  # noqa: F841
            bootstrapped_clim_skill, ci_low, ci_high
        )
    if "persistence" in reference:
        pers_ci = _distribution_to_ci(  # noqa: F841
            bootstrapped_pers_skill, ci_low_pers, ci_high_pers
        )

    # pvalue whether uninit or pers better than init forecast
    if "uninitialized" in reference:
        p_unin_over_init = _pvalue_from_distributions(  # noqa: F841
            bootstrapped_uninit_skill, bootstrapped_init_skill, metric=metric
        )
    if "climatology" in reference:
        p_clim_over_init = _pvalue_from_distributions(  # noqa: F841
            bootstrapped_clim_skill, bootstrapped_clim_skill, metric=metric
        )
    if "persistence" in reference:
        p_pers_over_init = _pvalue_from_distributions(  # noqa: F841
            bootstrapped_pers_skill, bootstrapped_init_skill, metric=metric
        )

    # gather return
    # p defined as probability that reference better than
    # initialized, therefore not defined for initialized skill
    # itself
    results = xr.concat(
        [
            init_skill,
            init_skill.where(init_skill == -999),
            init_ci.isel(quantile=0, drop=True),
            init_ci.isel(quantile=1, drop=True),
        ],
        dim="results",
        coords="minimal",
    ).assign_coords(
        results=("results", ["verify skill", "p", "low_ci", "high_ci"]),
        skill="initialized",
    )

    if reference != []:
        for r in reference:
            ref_skill = eval(f"{r[:4]}_skill")
            ref_p = eval(f"p_{r[:4]}_over_init")
            ref_ci_low = eval(f"{r[:4]}_ci").isel(quantile=0, drop=True)
            ref_ci_high = eval(f"{r[:4]}_ci").isel(quantile=1, drop=True)
            ref_results = xr.concat(
                [ref_skill, ref_p, ref_ci_low, ref_ci_high],
                dim="results",
                **CONCAT_KWARGS,
            ).assign_coords(
                skill=r, results=("results", ["verify skill", "p", "low_ci", "high_ci"])
            )
            if "member" in ref_results.dims:
                if not ref_results["member"].identical(results["member"]):
                    ref_results["member"] = results[
                        "member"
                    ]  # fixes m2c different member names in reference forecasts
            results = xr.concat([results, ref_results], dim="skill", **CONCAT_KWARGS)
        results = results.assign_coords(skill=["initialized"] + reference).squeeze()
    else:
        results = results.drop_sel(results="p")
    results = results.squeeze()

    # Attach climpred compute information to skill
    # results.results
    metadata_dict = {
        "confidence_interval_levels": f"{ci_high}-{ci_low}",
        "bootstrap_iterations": iterations,
    }
    if reference is not None:
        metadata_dict[
            "p"
        ] = "probability that reference performs better than initialized"
    metadata_dict.update(metric_kwargs)
    results = assign_attrs(
        results,
        hind,
        alignment=alignment,
        metric=metric,
        comparison=comparison,
        dim=dim,
        metadata_dict=metadata_dict,
    )
    # Ensure that the lead units get carried along for the calculation. The attribute
    # tends to get dropped along the way due to ``xarray`` functionality.
    results["lead"] = hind["lead"]
    if "units" in hind["lead"].attrs and "units" not in results["lead"].attrs:
        results["lead"].attrs["units"] = hind["lead"].attrs["units"]
    return results
Exemplo n.º 28
0
start = (37.0, -105.0)
end = (35.5, -65.0)

##############################
# Get the cross section, and convert lat/lon to supplementary coordinates:

cross = cross_section(data, start, end)
cross.set_coords(('lat', 'lon'), True)
print(cross)

##############################
# For this example, we will be plotting potential temperature, relative humidity, and
# tangential/normal winds. And so, we need to calculate those, and add them to the dataset:

temperature, pressure, specific_humidity = xr.broadcast(cross['Temperature'],
                                                        cross['isobaric'],
                                                        cross['Specific_humidity'])

theta = mpcalc.potential_temperature(pressure, temperature)
rh = mpcalc.relative_humidity_from_specific_humidity(specific_humidity, temperature, pressure)

# These calculations return unit arrays, so put those back into DataArrays in our Dataset
cross['Potential_temperature'] = xr.DataArray(theta,
                                              coords=temperature.coords,
                                              dims=temperature.dims,
                                              attrs={'units': theta.units})
cross['Relative_humidity'] = xr.DataArray(rh,
                                          coords=specific_humidity.coords,
                                          dims=specific_humidity.dims,
                                          attrs={'units': rh.units})
Exemplo n.º 29
0
def smape(a, b, dim=None, weights=None, skipna=False, keep_attrs=False):
    """Symmetric Mean Absolute Percentage Error.

    .. math::
        \\mathrm{SMAPE} = \\frac{1}{n} \\sum_{i=1}^{n}
                          \\frac{ \\vert a_{i} - b_{i} \\vert }
                          { \\vert a_{i} \\vert + \\vert b_{i} \\vert  }

    .. note::
        Percent error is reported as decimal percent. I.e., a value of 1 is
        100%.

    Parameters
    ----------
    a : xarray.Dataset or xarray.DataArray
        Labeled array(s) over which to apply the function.
        (Truth which will be divided by)
    b : xarray.Dataset or xarray.DataArray
        Labeled array(s) over which to apply the function.
    dim : str, list
        The dimension(s) to apply the smape along. Note that this dimension will
        be reduced as a result. Defaults to None reducing all dimensions.
    weights : xarray.Dataset or xarray.DataArray or None
        Weights matching dimensions of ``dim`` to apply during the function.
    skipna : bool
        If True, skip NaNs when computing function.
    keep_attrs : bool
        If True, the attributes (attrs) will be copied
        from the first input to the new one.
        If False (default), the new object will
        be returned without attributes.

    Returns
    -------
    xarray.Dataset or xarray.DataArray
        Symmetric Mean Absolute Percentage Error.

    References
    ----------
    https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error

    Examples
    --------
    >>> import numpy as np
    >>> import xarray as xr
    >>> from xskillscore import smape
    >>> a = xr.DataArray(np.random.rand(5, 3, 3),
                        dims=['time', 'x', 'y'])
    >>> b = xr.DataArray(np.random.rand(5, 3, 3),
                        dims=['time', 'x', 'y'])
    >>> smape(a, b, dim='time')
    """
    dim, axis = _preprocess_dims(dim, a)
    a, b = xr.broadcast(a, b, exclude=dim)
    weights = _preprocess_weights(a, dim, dim, weights)
    input_core_dims = _determine_input_core_dims(dim, weights)

    return xr.apply_ufunc(
        _smape,
        a,
        b,
        weights,
        input_core_dims=input_core_dims,
        kwargs={
            "axis": axis,
            "skipna": skipna
        },
        dask="parallelized",
        output_dtypes=[float],
        keep_attrs=keep_attrs,
    )
Exemplo n.º 30
0
def create_raster_polygons(ds,
                           mask=None,subset_bbox=None,
                           weights=None,weights_target='ds'):
    """ Create polygons for each pixel in a raster
    
    Keyword arguments:
    ds -- an xarray dataset with the variables 
          'lat_bnds' and 'lon_bnds', which are both
          lat/lon x 2 arrays giving the min and 
          max values of lat and lon for each pixel
          given by lat/lon
    subset_bbox -- by default None; if a geopandas
                   geodataframe is entered, the bounding
                   box around the geometries in the gdf 
                   are used to mask the grid, to reduce
                   the number of pixel polygons created
    mask -- ## THIS IS WHERE MASKS CAN BE ADDED - 
    # I.E. AN OCEAN MASK. OR MAYBE EVEN ALLOW 
    # SHAPEFILES TO BE ADDED AND CALCULATED
    # THE MASKED PIXELS ARE JUST IGNORED, AND NOT 
    # ADDED. will make identifying them harder
    # in the first bit of aggregate, but could 
    # make processing faster if you have a ton
    # of ocean pixels or something... 
          
    Returns:
    a geopandas geodataframe containing a 'geometry' 
    giving the pixel boundaries for each 'lat' / 'lon' 
    pair
                  
    Note: 
    'lat_bnds' and 'lon_bnds' can be created through the
    'get_bnds' function if they are not already included
    in the input raster file. 
    
    Note:
    Currently this code only supports regular 
    rectangular grids (so where every pixel side is
    a straight line in lat/lon space). Future versions
    may include support for irregular grids. 
    """
    
    # Standardize inputs
    ds = fix_ds(ds)
    ds = get_bnds(ds)
    #breakpoint()
    # Subset by shapefile bounding box, if desired
    if subset_bbox is not None:
        if type(subset_bbox) is gpd.geodataframe.GeoDataFrame:
            # Using the biggest difference in lat/lon to make sure that the pixels are subset
            # in a way that the bounding box is fully filled out
            bbox_thresh = np.max([ds.lat.diff('lat').max(),ds.lon.diff('lon').max()])+0.1
            ds = ds.sel(lon=slice(subset_bbox.total_bounds[0]-bbox_thresh,subset_bbox.total_bounds[2]+bbox_thresh),
                        lat=slice(subset_bbox.total_bounds[1]-bbox_thresh,subset_bbox.total_bounds[3]+bbox_thresh))
        else:
            warnings.warn('[subset_bbox] is not a geodataframe; no mask by polygon bounding box used.')
            
    # Process weights
    ds,winf = process_weights(ds,weights,target=weights_target)
            
    # Mask
    if mask is not None:
        warnings.warn('Masking by grid not yet supported. Stay tuned...')
        
    # Create dataset which has a lat/lon bound value for each individual pixel, 
    # broadcasted out over each lat/lon pair
    (ds_bnds,) = (xr.broadcast(ds.isel({d:0 for d in [k for k in ds.dims.keys() if k not in ['lat','lon','bnds']]}).
                              drop_vars([v for v in ds.keys() if v not in ['lat_bnds','lon_bnds']])))
    # Stack so it's just pixels and bounds
    ds_bnds = ds_bnds.stack(loc=('lat','lon'))
    
    # In order:
    # (lon0,lat0),(lon0,lat1),(lon1,lat1),(lon1,lat1), but as a single array; to be 
    # put in the right format for Polygon in the next step
    pix_poly_coords = np.transpose(np.vstack([ds_bnds.lon_bnds.isel(bnds=0).values,ds_bnds.lat_bnds.isel(bnds=0).values,
                                                ds_bnds.lon_bnds.isel(bnds=0).values,ds_bnds.lat_bnds.isel(bnds=1).values,
                                                ds_bnds.lon_bnds.isel(bnds=1).values,ds_bnds.lat_bnds.isel(bnds=1).values,
                                                ds_bnds.lon_bnds.isel(bnds=1).values,ds_bnds.lat_bnds.isel(bnds=0).values]))
    
    # Reshape so each location has a 4 x 2 (vertex vs coordinate) array, 
    # and convert each of those vertices to tuples. This means every element
    # of pix_poly_coords is the input to shapely.geometry.Polygon of one pixel
    pix_poly_coords = tuple(map(tuple,np.reshape(pix_poly_coords,(np.shape(pix_poly_coords)[0],4,2))))
    
    # Create empty geodataframe
    gdf_pixels = gpd.GeoDataFrame()
    gdf_pixels['lat'] = [None]*ds_bnds.dims['loc']
    gdf_pixels['lon'] = [None]*ds_bnds.dims['loc']
    gdf_pixels['geometry'] = [None]*ds_bnds.dims['loc']
    if weights is not None:
        # Stack weights so they are linearly indexed like the ds (and fill
        # NAs with 0s)
        weights = ds.weights.stack(loc=('lat','lon')).fillna(0)
        # Preallocate weights column
        gdf_pixels['weights'] = [None]*ds_bnds.dims['loc']
    
    # Now populate with a polygon for every pixel, and the lat/lon coordinates
    # of that pixel (Try if preallocating it with the right dimensions above 
    # makes it faster, because it's pretty slow rn (NB: it doesn't really))
    for loc_idx in np.arange(0,ds_bnds.dims['loc']):
        gdf_pixels.loc[loc_idx,'lat'] = ds_bnds.lat.isel(loc=loc_idx).values
        gdf_pixels.loc[loc_idx,'lon'] = ds_bnds.lon.isel(loc=loc_idx).values
        gdf_pixels.loc[loc_idx,'geometry'] = Polygon(pix_poly_coords[loc_idx])
        if weights is not None:
            gdf_pixels.loc[loc_idx,'weights'] = weights.isel(loc=loc_idx).values
        
    # Add a "pixel idx" to make indexing better later
    gdf_pixels['pix_idx'] = gdf_pixels.index.values
    
    # Add crs (normal lat/lon onto WGS84)
    gdf_pixels = gdf_pixels.set_crs("EPSG:4326")
    
    # Save the source grid for further reference
    source_grid = {'lat':ds_bnds.lat,'lon':ds_bnds.lon}
    
    pix_agg = {'gdf_pixels':gdf_pixels,'source_grid':source_grid}
    
    # Return the created geodataframe
    return pix_agg
Exemplo n.º 31
0
def pearson_r_p_value(a,
                      b,
                      dim=None,
                      weights=None,
                      skipna=False,
                      keep_attrs=False):
    """2-tailed p-value associated with pearson's correlation coefficient.

    Parameters
    ----------
    a : xarray.Dataset or xarray.DataArray
        Labeled array(s) over which to apply the function.
    b : xarray.Dataset or xarray.DataArray
        Labeled array(s) over which to apply the function.
    dim : str, list
        The dimension(s) to apply the correlation along. Note that this dimension will
        be reduced as a result. Defaults to None reducing all dimensions.
    weights : xarray.Dataset or xarray.DataArray or None
        Weights matching dimensions of ``dim`` to apply during the function.
    skipna : bool
        If True, skip NaNs when computing function.
    keep_attrs : bool
        If True, the attributes (attrs) will be copied
        from the first input to the new one.
        If False (default), the new object will
        be returned without attributes.

    Returns
    -------
    xarray.Dataset or xarray.DataArray
        2-tailed p-value of Pearson's correlation coefficient.

    See Also
    --------
    scipy.stats.pearsonr

    Examples
    --------
    >>> import numpy as np
    >>> import xarray as xr
    >>> from xskillscore import pearson_r_p_value
    >>> a = xr.DataArray(np.random.rand(5, 3, 3),
                        dims=['time', 'x', 'y'])
    >>> b = xr.DataArray(np.random.rand(5, 3, 3),
                        dims=['time', 'x', 'y'])
    >>> pearson_r_p_value(a, b, dim='time')
    """
    _fail_if_dim_empty(dim)
    dim, _ = _preprocess_dims(dim, a)
    a, b = xr.broadcast(a, b, exclude=dim)
    a, b, new_dim, weights = _stack_input_if_needed(a, b, dim, weights)
    weights = _preprocess_weights(a, dim, new_dim, weights)
    input_core_dims = _determine_input_core_dims(new_dim, weights)

    return xr.apply_ufunc(
        _pearson_r_p_value,
        a,
        b,
        weights,
        input_core_dims=input_core_dims,
        kwargs={
            "axis": -1,
            "skipna": skipna
        },
        dask="parallelized",
        output_dtypes=[float],
        keep_attrs=keep_attrs,
    )
Exemplo n.º 32
0
    def _plot_transect(self, remappedModelClimatology, remappedRefClimatology):
        # {{{
        """ plotting the transect """

        season = self.season
        config = self.config
        configSectionName = self.configSectionName

        mainRunName = config.get('runs', 'mainRunName')

        # broadcast x and z to have the same dimensions
        x, z = xr.broadcast(remappedModelClimatology.x,
                            remappedModelClimatology.z)

        # set lat and lon in case we want to plot versus these quantities
        lat = remappedModelClimatology.lat
        lon = remappedModelClimatology.lon

        # convert x, z, lat, and lon to numpy arrays; make a copy because
        # they are sometimes read-only (not sure why)
        x = x.values.copy().transpose()
        z = z.values.copy().transpose()
        lat = lat.values.copy().transpose()
        lon = lon.values.copy().transpose()
        self.lat = lat
        self.lon = lon

        # z is masked out with NaNs in some locations (where there is land) but
        # this makes pcolormesh unhappy so we'll zero out those locations
        z[numpy.isnan(z)] = 0.

        modelOutput = nans_to_numpy_mask(
            remappedModelClimatology[self.mpasFieldName].values)
        modelOutput = modelOutput.transpose()

        if remappedRefClimatology is None:
            refOutput = None
            bias = None
        else:
            refOutput = remappedRefClimatology[self.refFieldName]
            dims = refOutput.dims
            refOutput = nans_to_numpy_mask(refOutput.values)
            if dims[1] != 'nPoints':
                assert (dims[0] == 'nPoints')
                refOutput = refOutput.transpose()

            bias = modelOutput - refOutput

        filePrefix = self.filePrefix
        outFileName = '{}/{}.png'.format(self.plotsDirectory, filePrefix)
        title = '{}\n({}, years {:04d}-{:04d})'.format(self.fieldNameInTitle,
                                                       season, self.startYear,
                                                       self.endYear)

        xLabel = 'Distance [km]'
        yLabel = 'Depth [m]'

        # define the axis labels and the data to use for the upper
        # x axis or axes, if such additional axes have been requested

        upperXAxes = config.get('transects', 'upperXAxes')
        numUpperTicks = config.getint('transects', 'numUpperTicks')
        upperXAxisTickLabelPrecision = config.getint(
            'transects', 'upperXAxisTickLabelPrecision')

        self._set_third_x_axis_to_none()

        if upperXAxes == 'neither':
            self._set_second_x_axis_to_none()
        elif upperXAxes == 'lat':
            self._set_second_x_axis_to_latitude()
        elif upperXAxes == 'lon':
            self._set_second_x_axis_to_longitude()
        elif upperXAxes == 'both':
            self._set_second_x_axis_to_longitude()
            self._set_third_x_axis_to_latitude()
        elif upperXAxes == 'greatestExtent':
            if self._greatest_extent(lat, lon):
                self._set_second_x_axis_to_latitude()
            else:
                self._set_second_x_axis_to_longitude()
        elif upperXAxes == 'strictlyMonotonic':
            if self._strictly_monotonic(lat, lon):
                self._set_second_x_axis_to_latitude()
            else:
                self._set_second_x_axis_to_longitude()
        elif upperXAxes == 'mostMonotonic':
            if self._most_monotonic(lat, lon):
                self._set_second_x_axis_to_latitude()
            else:
                self._set_second_x_axis_to_longitude()
        elif upperXAxes == 'mostStepsInSameDirection':
            if self._most_steps_in_same_direction(lat, lon):
                self._set_second_x_axis_to_latitude()
            else:
                self._set_second_x_axis_to_longitude()
        elif upperXAxes == 'fewestDirectionChanges':
            if self._fewest_direction_changes(lat, lon):
                self._set_second_x_axis_to_latitude()
            else:
                self._set_second_x_axis_to_longitude()
        else:
            raise ValueError('invalid option for upperXAxes')

        # get the parameters determining what type of plot to use,
        # what line styles and line colors to use, and whether and how
        # to label contours

        compareAsContours = config.getboolean('transects',
                                              'compareAsContoursOnSinglePlot')

        contourLineStyle = config.get('transects', 'contourLineStyle')
        contourLineColor = config.get('transects', 'contourLineColor')
        comparisonContourLineStyle = config.get('transects',
                                                'comparisonContourLineStyle')
        comparisonContourLineColor = config.get('transects',
                                                'comparisonContourLineColor')

        if compareAsContours:
            labelContours = config.getboolean(
                'transects', 'labelContoursOnContourComparisonPlots')
        else:
            labelContours = config.getboolean('transects',
                                              'labelContoursOnHeatmaps')

        contourLabelPrecision = config.getint('transects',
                                              'contourLabelPrecision')

        # construct a three-panel comparison plot for the transect, or a
        # single-panel contour comparison plot if compareAsContours is True

        plot_vertical_section_comparison(
            config,
            x,
            z,
            modelOutput,
            refOutput,
            bias,
            outFileName,
            configSectionName,
            cbarLabel=self.unitsLabel,
            xlabel=xLabel,
            ylabel=yLabel,
            title=title,
            modelTitle='{}'.format(mainRunName),
            refTitle=self.refTitleLabel,
            diffTitle=self.diffTitleLabel,
            secondXAxisData=self.secondXAxisData,
            secondXAxisLabel=self.secondXAxisLabel,
            thirdXAxisData=self.thirdXAxisData,
            thirdXAxisLabel=self.thirdXAxisLabel,
            numUpperTicks=numUpperTicks,
            upperXAxisTickLabelPrecision=upperXAxisTickLabelPrecision,
            invertYAxis=False,
            backgroundColor='#918167',
            compareAsContours=compareAsContours,
            lineStyle=contourLineStyle,
            lineColor=contourLineColor,
            comparisonContourLineStyle=comparisonContourLineStyle,
            comparisonContourLineColor=comparisonContourLineColor,
            labelContours=labelContours,
            contourLabelPrecision=contourLabelPrecision)

        caption = '{} {}'.format(season, self.imageCaption)
        write_image_xml(config,
                        filePrefix,
                        componentName='Ocean',
                        componentSubdirectory='ocean',
                        galleryGroup=self.galleryGroup,
                        groupSubtitle=self.groupSubtitle,
                        groupLink=self.groupLink,
                        gallery=self.galleryName,
                        thumbnailDescription=self.thumbnailDescription,
                        imageDescription=caption,
                        imageCaption=caption)
Exemplo n.º 33
0
 def gen_ds():
     for val, d in ds.groupby(dim):
         del d[dim]  # delete grouped labels
         d[dim] = [val]
         d, = xr.broadcast(d)
         yield d
Exemplo n.º 34
0
def apply_param_noise(ds,
                      params,
                      noise_types,
                      shape=(0, ),
                      noise_sds=[0],
                      seed=0):
    """Apply noise to each timestep for each Monte Carlo draw of the outbreak 
    simulations.
    
    Parameters
    ----------
    ds : :class:`xarray.Dataset`
        A Dataset that has variables called ``[varname]_deterministic`` for each of the 
        parameters in `params`.
    params : list of str
        The name of the params in this dataset (e.g. ``beta, gamma, sigma`` for SEIR and
        ``beta, gamma`` for SIR).
    noise_types : list of str
        Same length as `params`. Each element is one of ``["normal", "exponential", 
        False]``. This defines the type of noise applied to each. False means no noise.
    shape : tuple of int, optional
        (n_samples, n_timesteps). Only needed if any `noise_types` are ``normal``
    noise_sds : list of float
        Standard deviations to use for any parameters with ``noise_type=="normal"``. 
        Must be same length as `params` but unused for any params with other 
        `noise_type`.
    seed : int
        Random seed for generating noise
        
    Returns
    -------
    out : :class:`xarray.Dataset`
        Same as `ds` but with ``[varname]_stoch`` stochastic variables added.
    """

    np.random.seed(seed)
    for px, param in enumerate(params):
        noise_type = noise_types[px]
        noise_sd = noise_sds[px]
        param_stoch = param + "_stoch"
        param_det = param + "_deterministic"
        if noise_type is None:
            continue
        elif noise_type == "normal":
            ds[param_stoch] = (
                ("sample", "t"),
                np.random.normal(0, noise_sd, shape),
            )
            ds[param_stoch] = ds[param_det] + ds[param_stoch]
        elif noise_type == "exponential":
            (ds[param_det], _, _) = xr.broadcast(ds[param_det], ds.sample,
                                                 ds.t)
            ds[param_stoch] = (
                ds[param_det].dims,
                np.random.exponential(ds[param_det]),
            )

        # commented out b/c inverse-exponential has undefined expected value
        #         and empirically changes the mean parameter by order(s) of magnitude
        #                 elif noise_type == "inv_exponential":
        #                     (out[param_det],_,_) = xr.broadcast(
        #                         out[param_det],
        #                         out.sample,
        #                         out.t)
        #                     out[param_stoch] = (
        #                         out[param_det].dims,
        #                         1/np.random.exponential(1/out[param_det])
        #                     )

        elif not noise_type:
            ds[param_stoch] = ds[param_det].copy()
        else:
            raise ValueError(noise_type)
        neg = ds[param_stoch] < 0
        n_bad = neg.sum().item()
        if n_bad > 0:
            n_tot = np.prod(ds[param_stoch].shape)
            dims = ["gamma"]
            if "sigma" in ds[param_stoch].dims:
                dims.append("sigma")
            to_sum = [d for d in ds[param_stoch].dims if d not in dims]
            cross_tab = (neg.sum(to_sum) / neg.count(to_sum)).to_dataframe()
            warnings.warn(
                f"Parameter {param} has {n_bad}/{n_tot} values <0 ({n_bad/n_tot:.2%}). "
                "These are non-physical params. If they are dropped in the simulation, "
                f"this will change the mean. Fraction of negative values: {cross_tab}"
            )
    return ds
Exemplo n.º 35
0
#########################################################################
# Calculations
# ------------
#
# Most of the calculations in `metpy.calc` will accept DataArrays by converting them
# into their corresponding unit arrays. While this may often work without any issues, we must
# keep in mind that because the calculations are working with unit arrays and not DataArrays:
#
# - The calculations will return unit arrays rather than DataArrays
# - Broadcasting must be taken care of outside of the calculation, as it would only recognize
#   dimensions by order, not name
#
# As an example, we calculate geostropic wind at 500 hPa below:

lat, lon = xr.broadcast(y, x)
f = mpcalc.coriolis_parameter(lat)
dx, dy = mpcalc.lat_lon_grid_deltas(lon, lat, initstring=data_crs.proj4_init)
heights = data['height'].metpy.loc[{'time': time[0], 'vertical': 500. * units.hPa}]
u_geo, v_geo = mpcalc.geostrophic_wind(heights, f, dx, dy)
print(u_geo)
print(v_geo)

#########################################################################
# Also, a limited number of calculations directly support xarray DataArrays or Datasets (they
# can accept *and* return xarray objects). Right now, this includes
#
# - Derivative functions
#     - ``first_derivative``
#     - ``second_derivative``
#     - ``gradient``
Exemplo n.º 36
0
        def gen_xy():
            for i, z in enumerate(self._z_vals):
                das = {}
                data = {}

                # multiple data variables rather than z coordinate
                if self._multi_var:
                    das['x'] = self._ds[self.x_coo]
                    das['y'] = self._ds[z]

                    if (self.y_err is not None) or \
                       (self.x_err is not None) or \
                       (self.c_coo is not None):
                        raise ValueError('Multi-var errors/c not implemented.')

                # z-coordinate to iterate over
                elif z is not None:
                    try:
                        # try positional indexing first, as much faster
                        sub_ds = self._ds[{self.z_coo: i}]
                    except ValueError:
                        # but won't work e.g. on non-dimensions
                        sub_ds = self._ds.loc[{self.z_coo: z}]

                    das['x'] = sub_ds[self.x_coo]
                    das['y'] = sub_ds[self.y_coo]

                    if self.c_coo is not None:
                        if mode == 'lineplot':
                            self._c_cols.append(np.asscalar(
                                sub_ds[self.c_coo].values.flatten()))
                        elif mode == 'scatter':
                            das['c'] = sub_ds[self.c_coo]

                    if self.y_err is not None:
                        das['ye'] = sub_ds[self.y_err]

                    if self.x_err is not None:
                        das['xe'] = sub_ds[self.x_err]

                # nothing to iterate over
                else:
                    das['x'] = self._ds[self.x_coo]
                    das['y'] = self._ds[self.y_coo]

                    if self.c_coo is not None:
                        if mode == 'lineplot':
                            self._c_cols.append(np.asscalar(
                                self._ds[self.c_coo].values.flatten()))
                        elif mode == 'scatter':
                            das['c'] = self._ds[self.c_coo]

                    if self.y_err is not None:
                        das['ye'] = self._ds[self.y_err]

                    if self.x_err is not None:
                        das['xe'] = self._ds[self.x_err]

                for k, da in zip(das, xr.broadcast(*das.values())):
                    data[k] = da.values.flatten()

                # Trim out missing data
                not_null = np.isfinite(data['x'])
                not_null &= np.isfinite(data['y'])

                # TODO: if scatter, broadcast *then* ravel x, y, c?

                data['x'] = data['x'][not_null]
                data['y'] = data['y'][not_null]

                # implement jitter
                if self.xjitter:
                    if self.xlog:
                        data['x'] = data['x'] * np.random.normal(
                            loc=1, scale=self.xjitter, size=data['x'].shape)
                    else:
                        data['x'] = data['x'] + np.random.normal(
                            loc=0, scale=self.xjitter, size=data['x'].shape)

                if self.yjitter:
                    if self.ylog:
                        data['y'] = data['y'] * np.random.normal(
                            loc=1, scale=self.yjitter, size=data['y'].shape)
                    else:
                        data['y'] = data['y'] + np.random.normal(
                            loc=0, scale=self.yjitter, size=data['y'].shape)

                if 'c' in data:
                    data['c'] = data['c'][not_null]
                if 'ye' in data:
                    data['ye'] = data['ye'][not_null]
                if 'xe' in data:
                    data['xe'] = data['xe'][not_null]

                yield data
Exemplo n.º 37
0
# ------------
#
# Nearly all of the calculations in `metpy.calc` will accept DataArrays by converting them
# into their corresponding unit arrays. While this may often work without any issues, we must
# keep in mind that because the calculations are working with unit arrays and not DataArrays:
#
# - The calculations will return unit arrays rather than DataArrays
# - Broadcasting must be taken care of outside of the calculation, as it would only recognize
#   dimensions by order, not name
#
# Also, some of the units used in CF conventions (such as 'degrees_north') are not recognized
# by pint, so we must implement a workaround.
#
# As an example, we calculate geostropic wind at 500 hPa below:

lat, lon = xr.broadcast(y, x)
f = mpcalc.coriolis_parameter(lat.values * units.degrees)
dx, dy = mpcalc.lat_lon_grid_deltas(lon.values, lat.values)
heights = data['height'].loc[time[0]].loc[{vertical.name: 500.}]
u_geo, v_geo = mpcalc.geostrophic_wind(heights, f, dx, dy, dim_order='yx')
print(u_geo)
print(v_geo)

#########################################################################
# Plotting
# --------
#
# Like most meteorological data, we want to be able to plot these data. DataArrays can be used
# like normal numpy arrays in plotting code, or we can use some of xarray's plotting
# functionality.
#
Exemplo n.º 38
0
def weights_lonlat(a):
    weights = np.cos(np.deg2rad(a.lat))
    _, weights = xr.broadcast(a, weights)
    return weights.isel(time=0, drop=True)
Exemplo n.º 39
0
def add_time_dimension(data, model_run):
    """
    Once all constraints and costs have been loaded into the model dataset, any
    timeseries data is loaded from file and substituted into the model dataset

    Parameters:
    -----------
    data : xarray Dataset
        A data structure which has already gone through `constraints_to_dataset`,
        `costs_to_dataset`, and `add_attributes`
    model_run : AttrDict
        Calliope model_run dictionary

    Returns:
    --------
    data : xarray Dataset
        A data structure with an additional time dimension to the input dataset,
        with all relevant `file=` entries replaced with data from file.

    """
    data['timesteps'] = pd.to_datetime(data.timesteps)

    # Search through every constraint/cost for use of '='
    for variable in data.data_vars:
        # 1) If '=' in variable, it will give the variable a string data type
        if data[variable].dtype.kind != 'U':
            continue

        # 2) convert to a Pandas Series to do 'string contains' search
        data_series = data[variable].to_series()

        # 3) get a Series of all the uses of 'file=' for this variable
        filenames = data_series[data_series.str.contains('file=')]

        # 4) If no use of 'file=' then we can be on our way
        if filenames.empty:
            continue

        # 5) remove all before '=' and split filename and location column
        filenames = filenames.str.split('=').str[1].str.rsplit(':', 1)
        if isinstance(filenames.index, pd.MultiIndex):
            filenames.index = filenames.index.remove_unused_levels()

        # 6) Get all timeseries data from dataframes stored in model_run
        timeseries_data = []
        key_errors = []
        for loc_tech, (filename, column) in filenames.iteritems():
            try:
                timeseries_data.append(
                    model_run.timeseries_data[filename].loc[:, column].values)
            except KeyError:
                key_errors.append(
                    'column `{}` not found in file `{}`, but was requested by '
                    'loc::tech `{}`.'.format(column, filename, loc_tech))
        if key_errors:
            exceptions.print_warnings_and_raise_errors(errors=key_errors)

        timeseries_data_series = pd.DataFrame(index=filenames.index,
                                              columns=data.timesteps.values,
                                              data=timeseries_data).stack()
        timeseries_data_series.index.rename('timesteps', -1, inplace=True)

        # 7) Add time dimension to the relevent DataArray and update the '='
        # dimensions with the time varying data (static data is just duplicated
        # at each timestep)
        timeseries_data_array = xr.broadcast(data[variable],
                                             data.timesteps)[0].copy()
        timeseries_data_array.loc[xr.DataArray.from_series(
            timeseries_data_series).coords] = xr.DataArray.from_series(
                timeseries_data_series).values

        # 8) assign correct dtype (might be string/object accidentally)
        # string 'nan' to NaN:

        array_to_check = timeseries_data_array.where(
            timeseries_data_array != 'nan', drop=True)
        timeseries_data_array = timeseries_data_array.where(
            timeseries_data_array != 'nan')

        if ((array_to_check == 'True') | (array_to_check == '1') |
            (array_to_check == 'False') |
            (array_to_check == '0')).all().item():
            # Turn to bool
            timeseries_data_array = ((timeseries_data_array == 'True') |
                                     (timeseries_data_array == '1')).copy()
        else:
            try:
                timeseries_data_array = timeseries_data_array.astype(
                    np.float, copy=False)
            except ValueError:
                None
        data[variable] = timeseries_data_array

    # Add timestep_resolution by looking at the time difference between timestep n
    # and timestep n + 1 for all timesteps
    time_delta = (data.timesteps.shift(timesteps=-1) -
                  data.timesteps).to_series()

    # Last timestep has no n + 1, so will be NaT (not a time),
    # we duplicate the penultimate time_delta instead
    time_delta[-1] = time_delta[-2]
    time_delta.name = 'timestep_resolution'
    # Time resolution is saved in hours (i.e. seconds / 3600)
    data['timestep_resolution'] = (xr.DataArray.from_series(
        time_delta.dt.total_seconds() / 3600))

    data['timestep_weights'] = xr.DataArray(np.ones(len(data.timesteps)),
                                            dims=['timesteps'])

    return data
Exemplo n.º 40
0
def first_run(
    da: xr.DataArray,
    window: int,
    dim: str = "time",
    coord: Optional[Union[str, bool]] = False,
    ufunc_1dim: Union[str, bool] = "auto",
) -> xr.DataArray:
    """Return the index of the first item of the first run of at least a given length.

    Parameters
    ----------
    da : xr.DataArray
      Input N-dimensional DataArray (boolean).
    window : int
      Minimum duration of consecutive run to accumulate values.
    dim : str
      Dimension along which to calculate consecutive run (default: 'time').
    coord : Optional[str]
      If not False, the function returns values along `dim` instead of indexes.
      If `dim` has a datetime dtype, `coord` can also be a str of the name of the
      DateTimeAccessor object to use (ex: 'dayofyear').
    ufunc_1dim : Union[str, bool]
      Use the 1d 'ufunc' version of this function : default (auto) will attempt to select optimal
      usage based on number of data points.  Using 1D_ufunc=True is typically more efficient
      for dataarray with a small number of gridpoints.

    Returns
    -------
    xr.DataArray
      Index (or coordinate if `coord` is not False) of first item in first valid run.
      Returns np.nan if there are no valid runs.
    """
    if ufunc_1dim == "auto":
        if isinstance(da.data,
                      dsk.Array) and len(da.chunks[da.dims.index(dim)]) > 1:
            ufunc_1dim = False
        else:
            npts = get_npts(da)
            ufunc_1dim = npts <= npts_opt

    da = da.fillna(
        0)  # We expect a boolean array, but there could be NaNs nonetheless

    if ufunc_1dim:
        out = first_run_ufunc(x=da, window=window, dim=dim)

    else:
        da = da.astype("int")
        i = xr.DataArray(np.arange(da[dim].size), dims=dim)
        ind = xr.broadcast(i, da)[0].transpose(*da.dims)
        if isinstance(da.data, dsk.Array):
            ind = ind.chunk(da.chunks)
        wind_sum = da.rolling(time=window).sum(skipna=False)
        out = ind.where(wind_sum >= window).min(dim=dim) - (window - 1)
        # remove window - 1 as rolling result index is last element of the moving window

    if coord:
        crd = da[dim]
        if isinstance(coord, str):
            crd = getattr(crd.dt, coord)

        out = lazy_indexing(crd, out)

    if dim in out.coords:
        out = out.drop_vars(dim)

    return out