示例#1
0
    def get_value(self, group, corr, extras, flag, flag_row, chanslice):
        coldata = self.get_column_data(group)
        # correlation may be pre-set by plot type, or may be passed to us
        corr = self.corr if self.corr is not None else corr
        # apply correlation reduction
        if coldata is not None and coldata.ndim == 3:
            assert corr is not None
            # the mapper can't have a specific axis set
            if self.mapper.axis is not None:
                raise TypeError(f"{self.name}: unexpected column with ndim=3")
            coldata = self.ms.corr_data_mappers[corr](coldata)
        # apply mapping function
        coldata = self.mapper.mapper(
            coldata, **{name: extras[name]
                        for name in self.mapper.extras})
        # scalar expanded to row vector
        if numpy.isscalar(coldata):
            coldata = da.full_like(flag_row,
                                   fill_value=coldata,
                                   dtype=type(coldata))
            flag = flag_row
        else:
            # apply channel slicing, if there's a channel axis in the array (and the array is a DataArray)
            if type(coldata) is xarray.DataArray and 'chan' in coldata.dims:
                coldata = coldata[dict(chan=chanslice)]
            # determine flags -- start with original flags
            if flag is not None:
                if coldata.ndim == 2:
                    flag = self.ms.corr_flag_mappers[corr](flag)
                elif coldata.ndim == 1:
                    if not self.mapper.axis:
                        flag = flag_row
                    elif self.mapper.axis == 1:
                        flag = None
                # shapes must now match
                if flag is not None and coldata.shape != flag.shape:
                    raise TypeError(f"{self.name}: unexpected column shape")
        # discretize
        if self.nlevels:
            # minmax set? discretize over that
            if self.discretized_delta is not None:
                coldata = da.floor(
                    (coldata - self.minmax[0]) / self.discretized_delta)
                coldata = da.minimum(da.maximum(coldata, 0),
                                     self.nlevels - 1).astype(COUNT_DTYPE)
            else:
                if coldata.dtype is bool:
                    if not numpy.issubdtype(coldata.dtype, numpy.integer):
                        raise TypeError(
                            f"{self.name}: min/max must be set to colour by non-integer values"
                        )
                    coldata = da.remainder(coldata,
                                           self.nlevels).astype(COUNT_DTYPE)

        if flag is not None:
            flag |= ~da.isfinite(coldata)
            return dama.masked_array(coldata, flag)
        else:
            return dama.masked_array(coldata, ~da.isfinite(coldata))
示例#2
0
def add_post_vali_mods(df_dd, ensemble_weights, ensemble_name):
    ### merge model predictions with ensemble weights
    all_models_with_postValiMods = dd.merge(df_dd,
                                            ensemble_weights,
                                            left_index=True,
                                            right_index=True)

    ### create post-validation set ensembles
    all_models_with_postValiMods[ensemble_name] = 0

    for v in list(df_dd.columns[df_dd.columns.str.startswith("mod_")]):
        m = all_models_with_postValiMods["reWt_" + v] == 0
        m1 = da.isfinite(all_models_with_postValiMods[v])

        all_models_with_postValiMods[ensemble_name] = all_models_with_postValiMods[ensemble_name]\
            .where(m,
                   all_models_with_postValiMods[ensemble_name] +
                       all_models_with_postValiMods[v].where(m1, 0) * all_models_with_postValiMods["reWt_"+v])

        del m

    ### drop the weights columns since they aren't needed anymore
    all_models_with_postValiMods = all_models_with_postValiMods.drop(
        labels=ensemble_weights.columns, axis=1)

    ### return dask graph
    return (all_models_with_postValiMods)
示例#3
0
def getHist2(imgs, bins=np.arange(-2, 20, 0.05)):
    """ get intensity histogram from a stack of imgs """

    if isinstance(imgs, dask.array.Array):
        H = da.histogram(imgs[da.isfinite(imgs)], bins=bins)[0]
        return H
    else:
        H = np.histogram(imgs[np.isfinite(imgs)], bins)[0]
        return np.asarray(H)
示例#4
0
def _qg_dask_array(x, axis, inplace):
    import dask.array as da
    from scipy.stats import norm
    from numpy_sugar import nanrankdata

    if inplace:
        raise NotImplementedError()

    x = x.swapaxes(1, axis)

    x = dask.array_shape_reveal(x)
    shape = da.compute(*x.shape)
    x = da.ma.masked_array(x)
    x *= -1
    x = da.apply_along_axis(_dask_apply, 0, x, nanrankdata, shape[0])
    x = x / (da.isfinite(x).sum(axis=0) + 1)
    x = da.apply_along_axis(_dask_apply, 0, x, norm.isf, shape[0])

    return x.swapaxes(1, axis)
示例#5
0
def test_arithmetic():
    x = np.arange(5).astype('f4') + 2
    y = np.arange(5).astype('i8') + 2
    z = np.arange(5).astype('i4') + 2
    a = da.from_array(x, chunks=(2,))
    b = da.from_array(y, chunks=(2,))
    c = da.from_array(z, chunks=(2,))
    assert eq(a + b, x + y)
    assert eq(a * b, x * y)
    assert eq(a - b, x - y)
    assert eq(a / b, x / y)
    assert eq(b & b, y & y)
    assert eq(b | b, y | y)
    assert eq(b ^ b, y ^ y)
    assert eq(a // b, x // y)
    assert eq(a ** b, x ** y)
    assert eq(a % b, x % y)
    assert eq(a > b, x > y)
    assert eq(a < b, x < y)
    assert eq(a >= b, x >= y)
    assert eq(a <= b, x <= y)
    assert eq(a == b, x == y)
    assert eq(a != b, x != y)

    assert eq(a + 2, x + 2)
    assert eq(a * 2, x * 2)
    assert eq(a - 2, x - 2)
    assert eq(a / 2, x / 2)
    assert eq(b & True, y & True)
    assert eq(b | True, y | True)
    assert eq(b ^ True, y ^ True)
    assert eq(a // 2, x // 2)
    assert eq(a ** 2, x ** 2)
    assert eq(a % 2, x % 2)
    assert eq(a > 2, x > 2)
    assert eq(a < 2, x < 2)
    assert eq(a >= 2, x >= 2)
    assert eq(a <= 2, x <= 2)
    assert eq(a == 2, x == 2)
    assert eq(a != 2, x != 2)

    assert eq(2 + b, 2 + y)
    assert eq(2 * b, 2 * y)
    assert eq(2 - b, 2 - y)
    assert eq(2 / b, 2 / y)
    assert eq(True & b, True & y)
    assert eq(True | b, True | y)
    assert eq(True ^ b, True ^ y)
    assert eq(2 // b, 2 // y)
    assert eq(2 ** b, 2 ** y)
    assert eq(2 % b, 2 % y)
    assert eq(2 > b, 2 > y)
    assert eq(2 < b, 2 < y)
    assert eq(2 >= b, 2 >= y)
    assert eq(2 <= b, 2 <= y)
    assert eq(2 == b, 2 == y)
    assert eq(2 != b, 2 != y)

    assert eq(-a, -x)
    assert eq(abs(a), abs(x))
    assert eq(~(a == b), ~(x == y))
    assert eq(~(a == b), ~(x == y))

    assert eq(da.logaddexp(a, b), np.logaddexp(x, y))
    assert eq(da.logaddexp2(a, b), np.logaddexp2(x, y))
    assert eq(da.exp(b), np.exp(y))
    assert eq(da.log(a), np.log(x))
    assert eq(da.log10(a), np.log10(x))
    assert eq(da.log1p(a), np.log1p(x))
    assert eq(da.expm1(b), np.expm1(y))
    assert eq(da.sqrt(a), np.sqrt(x))
    assert eq(da.square(a), np.square(x))

    assert eq(da.sin(a), np.sin(x))
    assert eq(da.cos(b), np.cos(y))
    assert eq(da.tan(a), np.tan(x))
    assert eq(da.arcsin(b/10), np.arcsin(y/10))
    assert eq(da.arccos(b/10), np.arccos(y/10))
    assert eq(da.arctan(b/10), np.arctan(y/10))
    assert eq(da.arctan2(b*10, a), np.arctan2(y*10, x))
    assert eq(da.hypot(b, a), np.hypot(y, x))
    assert eq(da.sinh(a), np.sinh(x))
    assert eq(da.cosh(b), np.cosh(y))
    assert eq(da.tanh(a), np.tanh(x))
    assert eq(da.arcsinh(b*10), np.arcsinh(y*10))
    assert eq(da.arccosh(b*10), np.arccosh(y*10))
    assert eq(da.arctanh(b/10), np.arctanh(y/10))
    assert eq(da.deg2rad(a), np.deg2rad(x))
    assert eq(da.rad2deg(a), np.rad2deg(x))

    assert eq(da.logical_and(a < 1, b < 4), np.logical_and(x < 1, y < 4))
    assert eq(da.logical_or(a < 1, b < 4), np.logical_or(x < 1, y < 4))
    assert eq(da.logical_xor(a < 1, b < 4), np.logical_xor(x < 1, y < 4))
    assert eq(da.logical_not(a < 1), np.logical_not(x < 1))
    assert eq(da.maximum(a, 5 - a), np.maximum(a, 5 - a))
    assert eq(da.minimum(a, 5 - a), np.minimum(a, 5 - a))
    assert eq(da.fmax(a, 5 - a), np.fmax(a, 5 - a))
    assert eq(da.fmin(a, 5 - a), np.fmin(a, 5 - a))

    assert eq(da.isreal(a + 1j * b), np.isreal(x + 1j * y))
    assert eq(da.iscomplex(a + 1j * b), np.iscomplex(x + 1j * y))
    assert eq(da.isfinite(a), np.isfinite(x))
    assert eq(da.isinf(a), np.isinf(x))
    assert eq(da.isnan(a), np.isnan(x))
    assert eq(da.signbit(a - 3), np.signbit(x - 3))
    assert eq(da.copysign(a - 3, b), np.copysign(x - 3, y))
    assert eq(da.nextafter(a - 3, b), np.nextafter(x - 3, y))
    assert eq(da.ldexp(c, c), np.ldexp(z, z))
    assert eq(da.fmod(a * 12, b), np.fmod(x * 12, y))
    assert eq(da.floor(a * 0.5), np.floor(x * 0.5))
    assert eq(da.ceil(a), np.ceil(x))
    assert eq(da.trunc(a / 2), np.trunc(x / 2))

    assert eq(da.degrees(b), np.degrees(y))
    assert eq(da.radians(a), np.radians(x))

    assert eq(da.rint(a + 0.3), np.rint(x + 0.3))
    assert eq(da.fix(a - 2.5), np.fix(x - 2.5))

    assert eq(da.angle(a + 1j), np.angle(x + 1j))
    assert eq(da.real(a + 1j), np.real(x + 1j))
    assert eq((a + 1j).real, np.real(x + 1j))
    assert eq(da.imag(a + 1j), np.imag(x + 1j))
    assert eq((a + 1j).imag, np.imag(x + 1j))
    assert eq(da.conj(a + 1j * b), np.conj(x + 1j * y))
    assert eq((a + 1j * b).conj(), (x + 1j * y).conj())

    assert eq(da.clip(b, 1, 4), np.clip(y, 1, 4))
    assert eq(da.fabs(b), np.fabs(y))
    assert eq(da.sign(b - 2), np.sign(y - 2))

    l1, l2 = da.frexp(a)
    r1, r2 = np.frexp(x)
    assert eq(l1, r1)
    assert eq(l2, r2)

    l1, l2 = da.modf(a)
    r1, r2 = np.modf(x)
    assert eq(l1, r1)
    assert eq(l2, r2)

    assert eq(da.around(a, -1), np.around(x, -1))
示例#6
0
def _stage_2(
    YP: Array,
    X: Array,
    Y: Array,
    alphas: Optional[NDArray] = None,
    normalize: bool = True,
    _glow_adj_alpha: bool = False,
    _glow_adj_scaling: bool = False,
) -> Tuple[Array, Array]:
    """Stage 2 - WGR Meta Regression

    This stage will train separate ridge regression models for each outcome
    using the predictions from stage 1 for that same outcome as features. These
    predictions are then evaluated based on R2 score to determine an optimal
    "meta" estimator (see `_stage_1` for the "base" estimator description). Results
    then include only predictions and coefficients from this optimal model.

    For more details, see the level 1 regression model described in step 1
    of [Mbatchou et al. 2020](https://www.biorxiv.org/content/10.1101/2020.06.19.162354v2).
    """
    assert YP.ndim == 4
    assert X.ndim == 2
    assert Y.ndim == 2
    # Check that chunking across samples is the same for all arrays
    assert YP.numblocks[2] == X.numblocks[0] == Y.numblocks[0]
    assert YP.chunks[2] == X.chunks[0] == Y.chunks[0]
    # Assert single chunks for covariates and outcomes
    assert X.numblocks[1] == Y.numblocks[1] == 1
    # Extract shape statistics
    n_variant_block, n_alpha_1 = YP.shape[:2]
    n_sample_block = Y.numblocks[0]
    n_sample, n_outcome = Y.shape
    n_covar = X.shape[1]
    n_indvar = n_covar + n_variant_block * n_alpha_1
    sample_chunks = Y.chunks[0]

    if normalize:
        assert_block_shape(YP, n_variant_block, 1, n_sample_block, 1)
        assert_chunk_shape(YP, 1, n_alpha_1, sample_chunks[0], n_outcome)
        # See: https://github.com/projectglow/glow/issues/260
        if _glow_adj_scaling:
            YP = da.map_blocks(
                lambda x: (x - x.mean(axis=2, keepdims=True))
                / x.std(axis=2, keepdims=True),
                YP,
            )
        else:
            YP = (YP - YP.mean(axis=2, keepdims=True)) / YP.std(axis=2, keepdims=True)
    # Tranpose for refit on level 1 predictions
    YP = YP.transpose((3, 2, 0, 1))
    assert_array_shape(YP, n_outcome, n_sample, n_variant_block, n_alpha_1)

    if alphas is None:
        # See: https://github.com/projectglow/glow/issues/255
        if _glow_adj_alpha:
            alphas = get_alphas(n_variant_block * n_alpha_1 * n_outcome)
        else:
            alphas = get_alphas(n_variant_block * n_alpha_1)
    n_alpha_2 = alphas.size

    YR = []
    BR = []
    for i in range(n_outcome):
        # Slice and reshape to new 2D covariate matrix;
        # The order of raveling in trailing dimensions is important
        # and later reshapes will assume variants, alphas order
        XPB = YP[i].reshape((n_sample, n_variant_block * n_alpha_1))
        # Prepend covariates and chunk along first dim only
        XPB = da.concatenate((X, XPB), axis=1)
        XPB = XPB.rechunk(chunks=(None, -1))
        assert_array_shape(XPB, n_sample, n_indvar)
        assert XPB.numblocks == (n_sample_block, 1)
        # Extract outcome vector
        YB = Y[:, [i]]
        assert XPB.ndim == YB.ndim == 2
        # Fit and predict folds for each parameter
        BB, YPB = _ridge_regression_cv(XPB, YB, alphas, n_zero_reg=n_covar)[-2:]
        assert_array_shape(BB, n_alpha_2, n_sample_block * n_indvar, 1)
        assert_array_shape(YPB, n_alpha_2, n_sample, 1)
        BR.append(BB)
        YR.append(YPB)

    # Concatenate predictions along outcome dimension
    YR = da.concatenate(YR, axis=2)
    assert_block_shape(YR, 1, n_sample_block, n_outcome)
    assert_chunk_shape(YR, n_alpha_2, sample_chunks[0], 1)
    assert_array_shape(YR, n_alpha_2, n_sample, n_outcome)
    # Move samples to last dim so all others are batch
    # dims for R2 calculations
    YR = da.transpose(YR, (0, 2, 1))
    assert_array_shape(YR, n_alpha_2, n_outcome, n_sample)
    YR = YR.rechunk((-1, -1, None))
    assert_block_shape(YR, 1, 1, n_sample_block)
    assert YR.shape[1:] == Y.T.shape

    # Concatenate betas along outcome dimension
    BR = da.concatenate(BR, axis=2)
    assert_block_shape(BR, 1, n_sample_block, n_outcome)
    assert_chunk_shape(BR, n_alpha_2, n_indvar, 1)
    assert_array_shape(BR, n_alpha_2, n_sample_block * n_indvar, n_outcome)

    # Compute R2 scores within each sample block for each outcome + alpha
    R2 = da.stack(
        [
            r2_score(YR.blocks[..., i], Y.T.blocks[..., i])
            # Avoid warnings on R2 calculations for blocks with single rows
            if YR.chunks[-1][i] > 1 else da.full(YR.shape[:-1], np.nan)
            for i in range(n_sample_block)
        ]
    )
    assert_array_shape(R2, n_sample_block, n_alpha_2, n_outcome)
    # Coerce to finite or nan before nan-aware mean
    R2 = da.where(da.isfinite(R2), R2, np.nan)
    # Find highest mean alpha score for each outcome across blocks
    R2M = da.nanmean(R2, axis=0)
    assert_array_shape(R2M, n_alpha_2, n_outcome)
    # Identify index for the alpha value with the highest mean score
    R2I = da.argmax(R2M, axis=0)
    assert_array_shape(R2I, n_outcome)

    # Choose the predictions corresponding to the model with best score
    YRM = da.stack([YR[R2I[i], i, :] for i in range(n_outcome)], axis=-1)
    YRM = YRM.rechunk((None, -1))
    assert_block_shape(YRM, n_sample_block, 1)
    assert_chunk_shape(YRM, sample_chunks[0], n_outcome)
    assert_array_shape(YRM, n_sample, n_outcome)
    # Choose the betas corresponding to the model with the best score
    BRM = da.stack([BR[R2I[i], :, i] for i in range(n_outcome)], axis=-1)
    BRM = BRM.rechunk((None, -1))
    assert_block_shape(BRM, n_sample_block, 1)
    assert_chunk_shape(BRM, n_indvar, n_outcome)
    assert_array_shape(BRM, n_sample_block * n_indvar, n_outcome)
    return BRM, YRM
示例#7
0
def tall_clutter(files,
                 config,
                 clutter_thresh_min=0.0002,
                 clutter_thresh_max=0.25,
                 radius=1,
                 write_radar=True,
                 out_file=None,
                 use_dask=False):
    """
    Wind Farm Clutter Calculation

    Parameters
    ----------
    files : list
        List of radar files used for the clutter calculation.
    config : str
        String representing the configuration for the radar.
        Such possible configurations are listed in default_config.py

    Other Parameters
    ----------------
    clutter_thresh_min : float
        Threshold value for which, any clutter values above the
        clutter_thres_min will be considered clutter, as long as they
        are also below the clutter_thres_max.
    clutter_thresh_max : float
        Threshold value for which, any clutter values below the
        clutter_thres_max will be considered clutter, as long as they
        are also above the clutter_thres_min.
    radius : int
        Radius of the area surrounding the clutter gate that will
        be also flagged as clutter.
    write_radar : bool
        Whether to or not, to write the clutter radar as a netCDF file.
        Default is True.
    out_file : string
        String of location and filename to write the radar object too,
        if write_radar is True.
    use_dask : bool
        Use dask instead of running stats for calculation. The will reduce
        run time.

    Returns
    -------
    clutter_radar : Radar
        Radar object with the clutter field that was calculated.
        This radar only has the clutter field, but maintains all
        other radar specifications.

    """
    field_names = get_field_names(config)
    refl_field = field_names["reflectivity"]
    vel_field = field_names["velocity"]
    ncp_field = field_names["normalized_coherent_power"]

    def get_reflect_array(file, first_shape):
        """ Retrieves a reflectivity array for a radar volume. """
        try:
            radar = pyart.io.read(
                file, include_fields=[refl_field, ncp_field, vel_field])
            reflect_array = deepcopy(radar.fields[refl_field]['data'])
            ncp = radar.fields[ncp_field]['data']
            height = radar.gate_z["data"]
            up_in_the_air = height > 2000.0
            the_mask = np.logical_or.reduce(
                (ncp < 0.8, reflect_array.mask, up_in_the_air))
            reflect_array = np.ma.masked_where(the_mask, reflect_array)
            del radar
            if reflect_array.shape == first_shape:
                return reflect_array.filled(fill_value=np.nan)
        except (TypeError, OSError):
            print(file + ' is corrupt...skipping!')
        return np.nan * np.zeros(first_shape)

    if use_dask is False:
        run_stats = _RunningStats()
        first_shape = 0
        for file in files:
            try:
                radar = pyart.io.read(file)
                reflect_array = radar.fields[refl_field]['data']
                ncp = deepcopy(radar.fields[ncp_field]['data'])
                #reflect_array = np.ma.masked_where(ncp < 0.7, reflect_array)

                if first_shape == 0:
                    first_shape = reflect_array.shape
                    clutter_radar = radar
                    run_stats.push(reflect_array)
                if reflect_array.shape == first_shape:
                    run_stats.push(reflect_array)
                del radar
            except (TypeError, OSError):
                print(file + ' is corrupt...skipping!')
                continue
        mean = run_stats.mean()
        stdev = run_stats.standard_deviation()
        clutter_values = stdev / mean
        clutter_values = np.ma.masked_invalid(clutter_values)
        clutter_values_no_mask = clutter_values.filled(clutter_values_max + 1)
    else:
        cluster = LocalCluster(n_workers=20, processes=True)
        client = Client(cluster)
        first_shape = 0
        i = 0
        while first_shape == 0:
            try:
                radar = pyart.io.read(files[i])
                reflect_array = radar.fields[refl_field]['data']
                first_shape = reflect_array.shape
                clutter_radar = radar
            except (TypeError, OSError):
                i = i + 1
                print(file + ' is corrupt...skipping!')
                continue
        arrays = [
            delayed(get_reflect_array)(file, first_shape) for file in files
        ]
        array = [
            da.from_delayed(a, shape=first_shape, dtype=float) for a in arrays
        ]
        array = da.stack(array, axis=0)
        print('## Calculating mean in parallel...')
        mean = np.array(da.nanmean(array, axis=0))
        print('## Calculating standard deviation...')
        count = np.array(da.sum(da.isfinite(array), axis=0))
        stdev = np.array(da.nanstd(array, axis=0))
        clutter_values = stdev / mean
        clutter_values = np.ma.masked_invalid(clutter_values)
        clutter_values = np.ma.masked_where(
            np.logical_or(clutter_values.mask, count < 20), clutter_values)
        # Masked arrays can suck
        clutter_values_no_mask = clutter_values.filled(
            (clutter_thresh_max + 1))

    shape = clutter_values.shape
    mask = np.ma.getmask(clutter_values)
    is_clutters = np.argwhere(
        np.logical_and.reduce((
            clutter_values_no_mask > clutter_thresh_min,
            clutter_values_no_mask < clutter_thresh_max,
        )))
    clutter_array = _clutter_marker(is_clutters, shape, mask, radius)
    clutter_radar.fields.clear()
    clutter_array = clutter_array.filled(0)
    clutter_dict = _clutter_to_dict(clutter_array)
    clutter_value_dict = _clutter_to_dict(clutter_values)
    clutter_value_dict["long_name"] = "Clutter value (std. dev/mean Z)"
    clutter_value_dict["standard_name"] = "clutter_value"
    clutter_radar.add_field('ground_clutter',
                            clutter_dict,
                            replace_existing=True)
    clutter_radar.add_field('clutter_value',
                            clutter_value_dict,
                            replace_existing=True)
    if write_radar is True:
        pyart.io.write_cfradial(out_file, clutter_radar)
    del clutter_radar
    return
示例#8
0
def test_arithmetic():
    x = np.arange(5).astype('f4') + 2
    y = np.arange(5).astype('i8') + 2
    z = np.arange(5).astype('i4') + 2
    a = da.from_array(x, chunks=(2, ))
    b = da.from_array(y, chunks=(2, ))
    c = da.from_array(z, chunks=(2, ))
    assert eq(a + b, x + y)
    assert eq(a * b, x * y)
    assert eq(a - b, x - y)
    assert eq(a / b, x / y)
    assert eq(b & b, y & y)
    assert eq(b | b, y | y)
    assert eq(b ^ b, y ^ y)
    assert eq(a // b, x // y)
    assert eq(a**b, x**y)
    assert eq(a % b, x % y)
    assert eq(a > b, x > y)
    assert eq(a < b, x < y)
    assert eq(a >= b, x >= y)
    assert eq(a <= b, x <= y)
    assert eq(a == b, x == y)
    assert eq(a != b, x != y)

    assert eq(a + 2, x + 2)
    assert eq(a * 2, x * 2)
    assert eq(a - 2, x - 2)
    assert eq(a / 2, x / 2)
    assert eq(b & True, y & True)
    assert eq(b | True, y | True)
    assert eq(b ^ True, y ^ True)
    assert eq(a // 2, x // 2)
    assert eq(a**2, x**2)
    assert eq(a % 2, x % 2)
    assert eq(a > 2, x > 2)
    assert eq(a < 2, x < 2)
    assert eq(a >= 2, x >= 2)
    assert eq(a <= 2, x <= 2)
    assert eq(a == 2, x == 2)
    assert eq(a != 2, x != 2)

    assert eq(2 + b, 2 + y)
    assert eq(2 * b, 2 * y)
    assert eq(2 - b, 2 - y)
    assert eq(2 / b, 2 / y)
    assert eq(True & b, True & y)
    assert eq(True | b, True | y)
    assert eq(True ^ b, True ^ y)
    assert eq(2 // b, 2 // y)
    assert eq(2**b, 2**y)
    assert eq(2 % b, 2 % y)
    assert eq(2 > b, 2 > y)
    assert eq(2 < b, 2 < y)
    assert eq(2 >= b, 2 >= y)
    assert eq(2 <= b, 2 <= y)
    assert eq(2 == b, 2 == y)
    assert eq(2 != b, 2 != y)

    assert eq(-a, -x)
    assert eq(abs(a), abs(x))
    assert eq(~(a == b), ~(x == y))
    assert eq(~(a == b), ~(x == y))

    assert eq(da.logaddexp(a, b), np.logaddexp(x, y))
    assert eq(da.logaddexp2(a, b), np.logaddexp2(x, y))
    assert eq(da.exp(b), np.exp(y))
    assert eq(da.log(a), np.log(x))
    assert eq(da.log10(a), np.log10(x))
    assert eq(da.log1p(a), np.log1p(x))
    assert eq(da.expm1(b), np.expm1(y))
    assert eq(da.sqrt(a), np.sqrt(x))
    assert eq(da.square(a), np.square(x))

    assert eq(da.sin(a), np.sin(x))
    assert eq(da.cos(b), np.cos(y))
    assert eq(da.tan(a), np.tan(x))
    assert eq(da.arcsin(b / 10), np.arcsin(y / 10))
    assert eq(da.arccos(b / 10), np.arccos(y / 10))
    assert eq(da.arctan(b / 10), np.arctan(y / 10))
    assert eq(da.arctan2(b * 10, a), np.arctan2(y * 10, x))
    assert eq(da.hypot(b, a), np.hypot(y, x))
    assert eq(da.sinh(a), np.sinh(x))
    assert eq(da.cosh(b), np.cosh(y))
    assert eq(da.tanh(a), np.tanh(x))
    assert eq(da.arcsinh(b * 10), np.arcsinh(y * 10))
    assert eq(da.arccosh(b * 10), np.arccosh(y * 10))
    assert eq(da.arctanh(b / 10), np.arctanh(y / 10))
    assert eq(da.deg2rad(a), np.deg2rad(x))
    assert eq(da.rad2deg(a), np.rad2deg(x))

    assert eq(da.logical_and(a < 1, b < 4), np.logical_and(x < 1, y < 4))
    assert eq(da.logical_or(a < 1, b < 4), np.logical_or(x < 1, y < 4))
    assert eq(da.logical_xor(a < 1, b < 4), np.logical_xor(x < 1, y < 4))
    assert eq(da.logical_not(a < 1), np.logical_not(x < 1))
    assert eq(da.maximum(a, 5 - a), np.maximum(a, 5 - a))
    assert eq(da.minimum(a, 5 - a), np.minimum(a, 5 - a))
    assert eq(da.fmax(a, 5 - a), np.fmax(a, 5 - a))
    assert eq(da.fmin(a, 5 - a), np.fmin(a, 5 - a))

    assert eq(da.isreal(a + 1j * b), np.isreal(x + 1j * y))
    assert eq(da.iscomplex(a + 1j * b), np.iscomplex(x + 1j * y))
    assert eq(da.isfinite(a), np.isfinite(x))
    assert eq(da.isinf(a), np.isinf(x))
    assert eq(da.isnan(a), np.isnan(x))
    assert eq(da.signbit(a - 3), np.signbit(x - 3))
    assert eq(da.copysign(a - 3, b), np.copysign(x - 3, y))
    assert eq(da.nextafter(a - 3, b), np.nextafter(x - 3, y))
    assert eq(da.ldexp(c, c), np.ldexp(z, z))
    assert eq(da.fmod(a * 12, b), np.fmod(x * 12, y))
    assert eq(da.floor(a * 0.5), np.floor(x * 0.5))
    assert eq(da.ceil(a), np.ceil(x))
    assert eq(da.trunc(a / 2), np.trunc(x / 2))

    assert eq(da.degrees(b), np.degrees(y))
    assert eq(da.radians(a), np.radians(x))

    assert eq(da.rint(a + 0.3), np.rint(x + 0.3))
    assert eq(da.fix(a - 2.5), np.fix(x - 2.5))

    assert eq(da.angle(a + 1j), np.angle(x + 1j))
    assert eq(da.real(a + 1j), np.real(x + 1j))
    assert eq((a + 1j).real, np.real(x + 1j))
    assert eq(da.imag(a + 1j), np.imag(x + 1j))
    assert eq((a + 1j).imag, np.imag(x + 1j))
    assert eq(da.conj(a + 1j * b), np.conj(x + 1j * y))
    assert eq((a + 1j * b).conj(), (x + 1j * y).conj())

    assert eq(da.clip(b, 1, 4), np.clip(y, 1, 4))
    assert eq(da.fabs(b), np.fabs(y))
    assert eq(da.sign(b - 2), np.sign(y - 2))

    l1, l2 = da.frexp(a)
    r1, r2 = np.frexp(x)
    assert eq(l1, r1)
    assert eq(l2, r2)

    l1, l2 = da.modf(a)
    r1, r2 = np.modf(x)
    assert eq(l1, r1)
    assert eq(l2, r2)

    assert eq(da.around(a, -1), np.around(x, -1))
示例#9
0
def predict_xr(
    model,
    input_xr,
    chunk_size=None,
    persist=True,
    proba=False,
    clean=False,
    return_input=False,
):
    """
    Using dask-ml ParallelPostfit(), runs  the parallel
    predict and predict_proba methods of sklearn
    estimators. Useful for running predictions
    on a larger-than-RAM datasets.

    Last modified: September 2020

    Parameters
    ----------
    model : scikit-learn model or compatible object
        Must have a .predict() method that takes numpy arrays.
    input_xr : xarray.DataArray or xarray.Dataset.
        Must have dimensions 'x' and 'y'
    chunk_size : int
        The dask chunk size to use on the flattened array. If this
        is left as None, then the chunks size is inferred from the
        .chunks() method on the `input_xr`
    persist : bool
        If True, and proba=True, then 'input_xr' data will be
        loaded into distributed memory. This will ensure data
        is not loaded twice for the prediction of probabilities,
        but this will only work if the data is not larger than RAM.
    proba : bool
        If True, predict probabilities. This only applies if the
        model has a .predict_proba() method
    clean : bool
        If True, remove Infs and NaNs from input and output arrays
    return_input : bool
        If True, then the data variables in the 'input_xr' dataset will
        be appended to the output xarray dataset.

    Returns
    ----------
    output_xr : xarray.Dataset
        An xarray.Dataset containing the prediction output from model
        with input_xr as input, if proba=True then dataset will also contain
        the prediciton probabilities. Has the same spatiotemporal structure
        as input_xr.

    """
    if chunk_size is None:
        chunk_size = int(input_xr.chunks["x"][0]) * int(
            input_xr.chunks["y"][0])

    # convert model to dask predict
    model = ParallelPostFit(model)

    # with joblib.parallel_backend("dask"):
    x, y, crs = input_xr.x, input_xr.y, input_xr.geobox.crs

    input_data = []

    for var_name in input_xr.data_vars:
        input_data.append(input_xr[var_name])

    input_data_flattened = []
    # TODO: transfer to dask dataframe
    for arr in input_data:
        data = arr.data.flatten().rechunk(chunk_size)
        input_data_flattened.append(data)

    # reshape for prediction
    input_data_flattened = da.array(input_data_flattened).transpose()

    if clean:
        input_data_flattened = da.where(da.isfinite(input_data_flattened),
                                        input_data_flattened, 0)

    if proba and persist:
        # persisting data so we don't require loading all the data twice
        input_data_flattened = input_data_flattened.persist()

    # apply the classification
    print("   predicting...")
    out_class = model.predict(input_data_flattened)

    # Mask out NaN or Inf values in results
    if clean:
        out_class = da.where(da.isfinite(out_class), out_class, 0)

    # Reshape when writing out
    out_class = out_class.reshape(len(y), len(x))

    # stack back into xarray
    output_xr = xr.DataArray(out_class,
                             coords={
                                 "x": x,
                                 "y": y
                             },
                             dims=["y", "x"])

    output_xr = output_xr.to_dataset(name="Predictions")

    if proba:
        print("   probabilities...")
        out_proba = model.predict_proba(input_data_flattened)

        # convert to %
        out_proba = da.max(out_proba, axis=1) * 100.0

        if clean:
            out_proba = da.where(da.isfinite(out_proba), out_proba, 0)

        out_proba = out_proba.reshape(len(y), len(x))

        out_proba = xr.DataArray(out_proba,
                                 coords={
                                     "x": x,
                                     "y": y
                                 },
                                 dims=["y", "x"])
        output_xr["Probabilities"] = out_proba

    if return_input:
        print("   input features...")
        # unflatten the input_data_flattened array and append
        # to the output_xr containin the predictions
        arr = input_xr.to_array()
        stacked = arr.stack(z=["y", "x"])
        # handle multivariable output
        output_px_shape = ()
        if len(input_data_flattened.shape[1:]):
            output_px_shape = input_data_flattened.shape[1:]

        output_features = input_data_flattened.reshape(
            (len(stacked.z), *output_px_shape))

        # set the stacked coordinate to match the input
        output_features = xr.DataArray(
            output_features,
            coords={
                "z": stacked["z"]
            },
            dims=[
                "z",
                *[
                    "output_dim_" + str(idx)
                    for idx in range(len(output_px_shape))
                ],
            ],
        ).unstack()

        # convert to dataset and rename arrays
        output_features = output_features.to_dataset(dim="output_dim_0")
        data_vars = list(input_xr.data_vars)
        output_features = output_features.rename(
            {i: j
             for i, j in zip(output_features.data_vars, data_vars)}  # noqa pylint: disable=unnecessary-comprehension
        )

        # merge with predictions
        output_xr = xr.merge([output_xr, output_features], compat="override")

    return assign_crs(output_xr, str(crs))
示例#10
0
    def get_value(self, group, corr, extras, flag, flag_row, chanslice):
        coldata = self.get_column_data(group)
        # correlation may be pre-set by plot type, or may be passed to us
        corr = self.corr if self.corr is not None else corr
        # apply correlation reduction
        if coldata is not None and coldata.ndim == 3:
            assert corr is not None
            # the mapper can't have a specific axis set
            if self.mapper.axis is not None:
                raise TypeError(f"{self.name}: unexpected column with ndim=3")
            coldata = self.ms.corr_data_mappers[corr](coldata)
        # apply mapping function
        mapper = self.mapper
        # complex values with an identity mapper get an amp mapper assigned to them by default
        if np.iscomplexobj(coldata) and mapper is data_mappers["_"]:
            mapper = data_mappers["amp"]
        coldata = mapper.mapper(
            coldata, **{name: extras[name]
                        for name in self.mapper.extras})
        # for a constant axis, compute minmax on the fly
        if mapper.const and self._minmax_autorange:
            if np.isscalar(coldata):
                min1 = max1 = coldata
            else:
                min1, max1 = coldata.data.min(), coldata.data.max()
            self.minmax = min(self.minmax[0], min1) if self.minmax[0] is not None else min1, \
                          min(self.minmax[1], max1) if self.minmax[1] is not None else max1
        # scalar is just a scalar
        if np.isscalar(coldata):
            coldata = da.array(coldata)
            flag = None
        else:
            # apply channel slicing, if there's a channel axis in the array (and the array is a DataArray)
            if type(coldata) is xarray.DataArray and 'chan' in coldata.dims:
                coldata = coldata[dict(chan=chanslice)]
            # determine flags -- start with original flags
            if flag is not None:
                if coldata.ndim == 2:
                    flag = self.ms.corr_flag_mappers[corr](flag)
                elif coldata.ndim == 1:
                    if not self.mapper.axis:
                        flag = flag_row
                    elif self.mapper.axis == 1:
                        flag = None
                # shapes must now match
                if flag is not None and coldata.shape != flag.shape:
                    raise TypeError(f"{self.name}: unexpected column shape")
        # # discretize
        # if self.nlevels:

        if coldata.dtype is bool or np.issubdtype(coldata.dtype, np.integer):
            if self._is_discrete is False:
                raise TypeError(
                    f"{self.label}: column changed from continuous-valued to discrete. This is a bug, or a very weird MS."
                )
            self._is_discrete = True
            # do we need to apply a remapping?
            if self.subset_remapper is not None:
                if type(
                        coldata
                ) is not dask.array.core.Array:  # could be xarray backed by dask array
                    coldata = coldata.data
                coldata = self.subset_remapper[coldata]
                bad_bins = da.greater_equal(coldata, len(self.subset_indices))
                if flag is None:
                    flag = bad_bins
                else:
                    flag = da.logical_or(flag.data, bad_bins)
        else:
            if self._is_discrete is True:
                raise TypeError(
                    f"{self.label}: column chnaged from discrete to continuous-valued. This is a bug, or a very weird MS."
                )
            self._is_discrete = False

        # Ensure dask arrays for creating dask masked arrays
        if isinstance(coldata, xarray.DataArray):
            coldata = coldata.data

        if isinstance(flag, xarray.DataArray):
            flag = flag.data

        bad_data = da.logical_not(da.isfinite(coldata))
        if flag is not None:
            return dama.masked_array(coldata, da.logical_or(flag, bad_data))
        else:
            return dama.masked_array(coldata, bad_data)