Пример #1
0
def get_array_moments(
        array: da.core.Array,
        mean: bool = True,
        std: bool = True,
        std_method: str = 'binom',
        axis: int = 0
) -> Tuple[Optional[da.core.Array], Optional[da.core.Array]]:
    """ Computes specified array_moments

    Parameters
    ----------
    array : array_like, shape (N, P)
        Array that moments will be computed from
    mean : bool
        Flag whether to compute mean of "array" along "axis"
    std : bool
        Flag whether to compute std of "array" along "axis"
    std_method : str
        Method used to compute standard deviation.

        Possible methods are:
            'norm' ===> Normal Distribution Standard Deviation. See np.std
            'binom' ====> Binomial Standard Deviation
                            sqrt(2*p*(1-p)), where p = "mean"/2
    axis : int
        Axis to compute mean and std along.

    Returns
    -------
    array_mean : da.core.array, optional
        If "mean" is false, returns None
        Otherwise returns the array mean
    array_std: da.core.array, optional
        If "std" is false, returns None
        Otherwise returns the array std
    """
    array_mean = None
    array_std = None

    if mean:
        array_mean = da.nanmean(array, axis=axis)

    if std:
        if std_method == 'binom':
            u = array_mean if mean else da.nanmean(array, axis=axis)
            u /= 2
            array_std = da.sqrt(2 * u * (1 - u))
        elif std_method == 'norm':
            array_std = da.nanstd(array, axis=axis)
        else:
            raise NotImplementedError(
                f'std_method, {std_method}, is not implemented ')

    array_mean, array_std = persist(array_mean, array_std)

    return array_mean, array_std
Пример #2
0
def make_plot_CTTH(scores, optf, crs, dnt, var, cosfield):
    fig = plt.figure(figsize=(16, 12))
    for cnt, s in enumerate(scores.keys()):
        values = scores[s]
        masked_values = np.ma.array(values[0], mask=np.isnan(values[0]))
        cmap = plt.get_cmap(values[3])
        cmap.set_bad('grey', 1.)
        ax = fig.add_subplot(4, 3, cnt + 1, projection=crs)  # ccrs.Robinson()
        ims = ax.imshow(masked_values,
                        transform=crs,
                        extent=crs.bounds,
                        vmin=values[1],
                        vmax=values[2],
                        cmap=cmap,
                        origin='upper',
                        interpolation='none')
        ax.coastlines(color='black')
        # mean = ''
        mean = weighted_spatial_average(values[0], cosfield).compute()
        mean = '{:.2f}'.format(da.nanmean(values[0]).compute())
        ax.set_title(var + ' ' + s + ' ' + dnt + ' {}'.format(mean))
        plt.colorbar(ims)
    plt.tight_layout()
    plt.savefig(optf)
    plt.close()
    print('SAVED ', os.path.basename(optf))
Пример #3
0
def power_spectrum(filter, time):
    """Compute the mean power spectrum over all particles at a given time.

    This routine gives the power spectrum (power spectral density) for
    each of the sampled variables within `filter`, as a mean over
    all particles. It will run a single advection step at the
    specified time. The resulting dictionary contains a `freq` item,
    with the FFT frequency bins for the output spectra.

    Args:
        filter (filtering.LagrangeFilter): The pre-configured filter object
            to use for running the analysis.
        time (float): The time at which to perform the analysis.

    Returns:
        Dict[str, numpy.ndarray]: A dictionary of power spectra for each of
            the sampled variables on the filter.

    """

    psds = {}
    advection_data = filter.advection_step(time, output_time=True)
    time_series = advection_data.pop("time")

    for v, a in advection_data.items():
        spectra = da.fft.fft(a[1].rechunk((-1, "auto")), axis=0)
        mean_spectrum = da.nanmean(da.absolute(spectra) ** 2, axis=1)
        psds[v] = mean_spectrum.compute()

    psds["freq"] = 2 * np.pi * np.fft.fftfreq(time_series.size, filter.output_dt)

    return psds
Пример #4
0
def _hotspots_dask_numpy(raster, kernel):

    # apply kernel to raster values
    mean_array = convolve_2d(raster.data, kernel / kernel.sum())

    # calculate z-scores
    global_mean = da.nanmean(raster.data)
    global_std = da.nanstd(raster.data)

    # commented out to avoid early compute to check if global_std is zero
    # if global_std == 0:
    #     raise ZeroDivisionError(
    #         "Standard deviation of the input raster values is 0."
    #     )

    z_array = (mean_array - global_mean) / global_std

    _func = partial(_calc_hotspots_numpy)
    pad_h = kernel.shape[0] // 2
    pad_w = kernel.shape[1] // 2

    out = z_array.map_overlap(_func,
                              depth=(pad_h, pad_w),
                              boundary=np.nan,
                              meta=np.array(()))
    return out
Пример #5
0
    def fit(
        self,
        X: Union[ArrayLike, DataFrameType],
        y: Optional[Union[ArrayLike, SeriesType]] = None,
    ) -> "StandardScaler":
        self._reset()
        attributes = OrderedDict()
        if isinstance(X, (pd.DataFrame, dd.DataFrame)):
            X = X.values

        if self.with_mean:
            mean_ = nanmean(X, 0)
            attributes["mean_"] = mean_
        if self.with_std:
            var_ = nanvar(X, 0)
            scale_ = var_.copy()
            scale_[scale_ == 0] = 1
            scale_ = da.sqrt(scale_)
            attributes["scale_"] = scale_
            attributes["var_"] = var_

        attributes["n_samples_seen_"] = np.nan
        values = compute(*attributes.values())
        for k, v in zip(attributes, values):
            setattr(self, k, v)
        self.n_features_in_ = X.shape[1]
        return self
Пример #6
0
def nanmean(a, axis=None, dtype=None, out=None):
    if a.dtype.kind == "O":
        return _nanmean_ddof_object(0, a, axis=axis, dtype=dtype)

    if isinstance(a, dask_array_type):
        return dask_array.nanmean(a, axis=axis, dtype=dtype)

    return np.nanmean(a, axis=axis, dtype=dtype)
Пример #7
0
def semivar(*args, **kwargs):
    """
    semivariance
    """
    args = uf.broadcast(*args)
    X = da.stack([x.task.flatten() for x in args])
    out = 0.5 * da.nanmean((X[0] - X[1])**2)
    return out
Пример #8
0
def nanmean(a, axis=None, dtype=None, out=None):
    if a.dtype.kind == 'O':
        return _nanmean_ddof_object(0, a, axis=axis, dtype=dtype)

    if isinstance(a, dask_array_type):
        return dask_array.nanmean(a, axis=axis, dtype=dtype)

    return np.nanmean(a, axis=axis, dtype=dtype)
Пример #9
0
def stripsel_ana(img_in):
    """ Analyze data of the stripsel JF detector (incomplete: still needs to integrate etc) """
    if len(img_in.shape) == 3:
        if isinstance(img_in, da.Array):
            img_in = da.nanmean(img_in, axis=0)
            img_in = img_in.compute()
        else:
            img_in = np.nanmean(img_in, axis=0)
    img_corr = correct_stripeJF(img_in)
    return {'img_corr': img_corr, 'img_init': img_in}
Пример #10
0
    def _fit_array(self, X):
        if self.strategy not in {"mean", "constant"}:
            msg = "Can only use strategy='mean' or 'constant' with Dask Array."
            raise ValueError(msg)

        if self.strategy == "mean":
            statistics = da.nanmean(X, axis=0).compute()
        else:
            statistics = np.full(X.shape[1], self.fill_value, dtype=X.dtype)

        self.statistics_, = da.compute(statistics)
Пример #11
0
 def _calculate_summary_statistics(self):
     data = self._lazy_data()
     _raveled = data.ravel()
     _mean, _std, _min, _q1, _q2, _q3, _max = da.compute(
         da.nanmean(data),
         da.nanstd(data),
         da.nanmin(data),
         da.percentile(_raveled, [25, ]),
         da.percentile(_raveled, [50, ]),
         da.percentile(_raveled, [75, ]),
         da.nanmax(data), )
     return _mean, _std, _min, _q1, _q2, _q3, _max
Пример #12
0
def nanmean(a, axis=None, dtype=None, out=None):
    if a.dtype.kind == "O":
        return _nanmean_ddof_object(0, a, axis=axis, dtype=dtype)

    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore", r"Mean of empty slice", category=RuntimeWarning
        )
        if isinstance(a, dask_array_type):
            return dask_array.nanmean(a, axis=axis, dtype=dtype)

        return np.nanmean(a, axis=axis, dtype=dtype)
Пример #13
0
 def _calculate_summary_statistics(self):
     data = self._lazy_data()
     _raveled = data.ravel()
     _mean, _std, _min, _q1, _q2, _q3, _max = da.compute(
         da.nanmean(data),
         da.nanstd(data),
         da.nanmin(data),
         da.percentile(_raveled, [25, ]),
         da.percentile(_raveled, [50, ]),
         da.percentile(_raveled, [75, ]),
         da.nanmax(data), )
     return _mean, _std, _min, _q1, _q2, _q3, _max
Пример #14
0
Файл: ds.py Проект: elaeon/ML
    def stadistics(self):
        headers = ["group", "mean", "std dev", "min", "25%", "50%", "75%", "max", "nonzero", "nonan", "unique", "dtype"]
        self.chunksize = Chunks.build_from_shape(self.shape, self.dtypes)
        table = []
        for group, (dtype, _) in self.dtypes.fields.items():
            values = dict()
            values["dtype"] = dtype
            values["group"] = group
            darray = self.data[group].da
            if dtype == np.dtype(float) or dtype == np.dtype(int):
                da_mean = da.around(darray.mean(), decimals=3)
                da_std = da.around(darray.std(), decimals=3)
                da_min = da.around(darray.min(), decimals=3)
                da_max = da.around(darray.max(), decimals=3)
                result = dask.compute([da_mean, da_std, da_min, da_max])[0]
                values["mean"] = result[0] if not np.isnan(result[0]) else da.around(da.nanmean(darray), decimals=3).compute()
                values["std dev"] = result[1] if not np.isnan(result[0]) else da.around(da.nanstd(darray), decimals=3).compute()
                values["min"] = result[2] if not np.isnan(result[0]) else da.around(da.nanmin(darray), decimals=3).compute()
                values["max"] = result[3] if not np.isnan(result[0]) else da.around(da.nanmax(darray), decimals=3).compute()
                if len(self.shape[group]) == 1:
                    da_percentile = da.around(da.percentile(darray, [25, 50, 75]), decimals=3)
                    result = da_percentile.compute()
                    values["25%"] = result[0]
                    values["50%"] = result[1]
                    values["75%"] = result[2]
                else:
                    values["25%"] = "-"
                    values["50%"] = "-"
                    values["75%"] = "-"
                values["nonzero"] = da.count_nonzero(darray).compute()
                values["nonan"] = da.count_nonzero(da.notnull(darray)).compute()
                values["unique"] = "-"
            else:
                values["mean"] = "-"
                values["std dev"] = "-"
                values["min"] = "-"
                values["max"] = "-"
                values["25%"] = "-"
                values["50%"] = "-"
                values["75%"] = "-"
                values["nonzero"] = "-"
                values["nonan"] = da.count_nonzero(da.notnull(darray)).compute()
                vunique = darray.to_dask_dataframe().fillna('').nunique().compute()
                values["unique"] = vunique

            row = []
            for column in headers:
                row.append(values[column])
            table.append(row)

        print("# rows {}".format(self.shape[0]))
        return tabulate(table, headers)
Пример #15
0
def _impute_dask_array(x):
    import dask.array as da

    m = da.nanmean(x, axis=0).compute()
    start = 0

    arrs = []
    for i in range(len(x.chunks[1])):
        end = start + x.chunks[1][i]
        impute = _get_imputer(m[start:end])
        arrs.append(x[:, start:end].map_blocks(impute, dtype=float))
        start = end
    return da.concatenate(arrs, axis=1)
Пример #16
0
def test_nan():
    x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]])
    d = da.from_array(x, chunks=(2, 2))

    assert_eq(np.nansum(x), da.nansum(d))
    assert_eq(np.nansum(x, axis=0), da.nansum(d, axis=0))
    assert_eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1))
    assert_eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1))
    assert_eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1)))
    assert_eq(np.nanvar(x), da.nanvar(d))
    assert_eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0))
    assert_eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0))
    assert_eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0))
    assert_eq(np.nanprod(x), da.nanprod(d))
Пример #17
0
def combine_extension_to_new_hdu(inputs_collection: PipelineCollection,
                                 operation: CombineOperation,
                                 ext: typing.Union[str, int],
                                 plane_shape: tuple[int, int]):
    image_cube = iofits.hdulists_to_dask_cube(inputs_collection.items,
                                              plane_shape,
                                              ext=ext)
    if operation is CombineOperation.MEAN:
        result = da.nanmean(image_cube, axis=0)
    if not isinstance(ext, int):
        hdr = {'EXTNAME': ext}
    else:
        hdr = None
    return dask.delayed(iofits.DaskHDU)(result, header=hdr, kind="image")
Пример #18
0
def test_nan():
    x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]])
    d = da.from_array(x, blockshape=(2, 2))

    assert eq(np.nansum(x), da.nansum(d))
    assert eq(np.nansum(x, axis=0), da.nansum(d, axis=0))
    assert eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1))
    assert eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1))
    assert eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1)))
    assert eq(np.nanvar(x), da.nanvar(d))
    assert eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0))
    assert eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0))
    assert eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0))
    with ignoring(AttributeError):
        assert eq(np.nanprod(x), da.nanprod(d))
Пример #19
0
 def _calculate_summary_statistics(self, rechunk=True):
     if rechunk is True:
         # Use dask auto rechunk instead of HyperSpy's one, what should be
         # better for these operations
         rechunk = "dask_auto"
     data = self._lazy_data(rechunk=rechunk)
     _raveled = data.ravel()
     _mean, _std, _min, _q1, _q2, _q3, _max = da.compute(
         da.nanmean(data),
         da.nanstd(data),
         da.nanmin(data),
         da.percentile(_raveled, [25, ]),
         da.percentile(_raveled, [50, ]),
         da.percentile(_raveled, [75, ]),
         da.nanmax(data), )
     return _mean, _std, _min, _q1, _q2, _q3, _max
Пример #20
0
 def _calculate_summary_statistics(self, rechunk=True):
     if rechunk is True:
         # Use dask auto rechunk instead of HyperSpy's one, what should be
         # better for these operations
         rechunk = "dask_auto"
     data = self._lazy_data(rechunk=rechunk)
     _raveled = data.ravel()
     _mean, _std, _min, _q1, _q2, _q3, _max = da.compute(
         da.nanmean(data),
         da.nanstd(data),
         da.nanmin(data),
         da.percentile(_raveled, [25, ]),
         da.percentile(_raveled, [50, ]),
         da.percentile(_raveled, [75, ]),
         da.nanmax(data), )
     return _mean, _std, _min, _q1, _q2, _q3, _max
Пример #21
0
def test_nan():
    x = np.array([[1, np.nan, 3, 4],
                  [5, 6, 7, np.nan],
                  [9, 10, 11, 12]])
    d = da.from_array(x, chunks=(2, 2))

    assert_eq(np.nansum(x), da.nansum(d))
    assert_eq(np.nansum(x, axis=0), da.nansum(d, axis=0))
    assert_eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1))
    assert_eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1))
    assert_eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1)))
    assert_eq(np.nanvar(x), da.nanvar(d))
    assert_eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0))
    assert_eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0))
    assert_eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0))
    assert_eq(nanprod(x), da.nanprod(d))
Пример #22
0
def cov(*args, axis=None, **kwargs):
    """
    covariance
    """
    if axis is None:
        args = [x.flatten() for x in args]
        axis = 0

    X = da.stack(args, axis=-1).rechunk(com.CHUNKSIZE)
    cond = da.any(da.isnan(X), axis=-1)
    X = da.where(cond[..., None], np.nan, X)

    X -= da.nanmean(X, axis=axis, keepdims=True)
    X = da.where(da.isnan(X), 0, X)
    return X.swapaxes(axis, -1) @ X.swapaxes(axis,
                                             -2).conj() / (X.shape[axis] - 1)
Пример #23
0
def test_nan():
    x = np.array([[1, np.nan, 3, 4],
                  [5, 6, 7, np.nan],
                  [9, 10, 11, 12]])
    d = da.from_array(x, blockshape=(2, 2))

    assert eq(np.nansum(x), da.nansum(d))
    assert eq(np.nansum(x, axis=0), da.nansum(d, axis=0))
    assert eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1))
    assert eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1))
    assert eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1)))
    assert eq(np.nanvar(x), da.nanvar(d))
    assert eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0))
    assert eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0))
    assert eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0))
    with ignoring(AttributeError):
        assert eq(np.nanprod(x), da.nanprod(d))
Пример #24
0
    def fit(self, X, y=None):
        self._reset()
        attributes = OrderedDict()
        if isinstance(X, (pd.DataFrame, dd.DataFrame)):
            X = X.values

        if self.with_mean:
            mean_ = nanmean(X, 0)
            attributes["mean_"] = mean_
        if self.with_std:
            var_ = nanvar(X, 0)
            scale_ = var_.copy()
            scale_[scale_ == 0] = 1
            scale_ = da.sqrt(scale_)
            attributes["scale_"] = scale_
            attributes["var_"] = var_

        attributes["n_samples_seen_"] = np.nan
        values = compute(*attributes.values())
        for k, v in zip(attributes, values):
            setattr(self, k, v)
        return self
Пример #25
0
def azInt(img, poni, rot, wvl, plot=1, clim=(0,300), corrImg = None):
    npt_az = 360
    npt_rad = 1000

    if len(img.shape)==3:
        if isinstance(img, da.Array):
            img = da.nanmean(img, axis=0)
            img = img.compute()
        else:
            img = np.nanmean(img, axis=0)
    
    if not (corrImg is None):
        img = corrImg(img)

    q2d, chi, I2d = azInt2d(img, npt_rad, npt_az, poni, rot, wvl)
    q, I = azInt1d(img, npt_rad, poni, rot, wvl)
    
    if plot:
        plotAz(img, q, I, chi, I2d, clim=clim)
    
    if expecting() == 1:
        return dict(q=q, I=I, chi=chi, I2d=I2d)
    else:
        return q, I, chi, I2d
Пример #26
0
    def fit(
        self,
        X: Union[ArrayLike, DataFrameType],
        y: Optional[Union[ArrayLike, SeriesType]] = None,
    ) -> "StandardScaler":
        self._reset()
        X = self._validate_data(
            X,
            estimator=self,
            accept_dask_array=True,
            accept_dask_dataframe=True,
            accept_unknown_chunks=True,
            preserve_pandas_dataframe=True,
        )

        attributes = OrderedDict()
        if isinstance(X, (pd.DataFrame, dd.DataFrame)):
            X = X.values

        if self.with_mean:
            mean_ = nanmean(X, 0)
            attributes["mean_"] = mean_
        if self.with_std:
            var_ = nanvar(X, 0)
            scale_ = var_.copy()
            scale_[scale_ == 0] = 1
            scale_ = da.sqrt(scale_)
            attributes["scale_"] = scale_
            attributes["var_"] = var_

        attributes["n_samples_seen_"] = X.shape[0]
        values = compute(*attributes.values())
        for k, v in zip(attributes, values):
            setattr(self, k, v)
        self.n_features_in_: int = X.shape[1]
        return self
def composite(src_fps, save_loc, save_nam, method="mean", dt="default"):
    """Creates a composite from multiple rasters. Individual rasters have to be
    of the same size (extents, pixel size, data type). Multiple compositing
    are available, including mean, min, max, median etc.

    Parameters
    ----------
    src_fps : list(str)
        List of paths to source files.
    save_loc : str
        Path to save folder.
    save_nam : str
        Name of the file to be saved.
    method : str
        Compositing method, either "mean", "min", "max" or "median".
    dt : str(optional)
        Orbit direction, either "DES" or "ASC" (required for generating
        previews).

    Returns
    -------
    out_pth : str
        Absolute path to the product.
    """
    # Make sure save location exists
    os.makedirs(save_loc, exist_ok=True)

    # Save TIFF metadata for output
    with rasterio.open(src_fps[0]) as rst:
        out_meta = rst.profile.copy()

    # Lazily load files into DASK ARRAYS
    print(f"#\n# Preparing Dask arrays...")
    chunks = {'band': 1, 'x': 1024, 'y': 1024}
    lazy_arrays = [xr.open_rasterio(fp, chunks=chunks) for fp in src_fps]
    stacked = da.concatenate(lazy_arrays, axis=0)
    stacked[stacked == 0] = np.nan
    # Calculate composite for selected method with dask
    print(f"# Compositing ({method}) using Dask...")
    if method == 'mean':
        comp_out = da.nanmean(stacked, axis=0, keepdims=True).compute()
    elif method == 'median':
        comp_out = da.nanmedian(stacked, axis=0, keepdims=True).compute()
    elif method == 'max':
        comp_out = da.nanmax(stacked, axis=0, keepdims=True).compute()
    elif method == 'min':
        comp_out = da.nanmin(stacked, axis=0, keepdims=True).compute()
    else:
        raise Exception('{} is not a valid compositing '
                        'method!'.format(method))

    # ----------------------------------------------------------------------------
    # SAVE RESULTS TO FILES
    # ----------------------------------------------------------------------------
    # Save composite to GeoTIFF
    tif_time = time.time()
    print("#\n# Saving composite image to TIFF...")

    out_nam = save_nam + ".tif"
    out_pth = os.path.join(save_loc, out_nam)
    out_meta.update(bigtiff="yes", compress='lzw')

    with rasterio.open(out_pth, "w", **out_meta) as dest:
        dest.write(comp_out)

    tif_time = time.time() - tif_time
    print(f"#  Time (TIFF): {tif_time:.2f} seconds")

    # # Save preview file as JPEG
    # jpg_time = time.time()
    # print("#\n# Saving preview image to JPEG...")
    # # Pickle array for passing it to plot_preview()
    # spt = os.path.join(save_loc, "temp_array.p")
    # with open(spt, "wb") as pf:
    #     pickle.dump(comp_out, pf)
    # comp_out = None
    # try:
    #     plot_preview(spt, dt, out_pth[:-3] + "jpg")
    # except MemoryError as me:
    #     print("#  Memory error occurred, could not save to JPEG")
    #     print(me)
    # finally:
    #     # delete pickle
    #     os.remove(spt)
    # jpg_time = time.time() - jpg_time
    # print(f"#  Time (JPEG): {jpg_time:.2f} seconds")

    return out_pth
    print(f"{n} clusters, score = {scores[-1]}")
plt.plot(klabels, scores)
plt.ylabel('Silhoutte score')
plt.xlabel('$n_{clusters}$')

print('Performing clustering via KMeans', rIVs[:, :kmeans_d].shape)
kmeans = KMeans(n_clusters=8, random_state=10,
                n_jobs=-1).fit(rIVs[:, :kmeans_d])
clustering = kmeans.predict(rIVs[:, :kmeans_d])

# For visualization of curves corresponding to clusters, we grab the full spectra, filter out points where the spectrum was not measured due to drift and calculate the mean per image per spectrum.

validIVs = da.where(fullIVs == 0, np.nan, fullIVs).reshape(
    (fullIVs.shape[0], -1))
meanIVs = [
    da.nanmean(validIVs[:, clustering == index], axis=1)
    for index in range(kmeans.n_clusters)
]

# +
tstart = time.time()
print('plotting clustering data')

fig2, axs = plt.subplots(2, 4, figsize=[11, 5], dpi=600)
#In case of 1 out of 1 columns figure
axs = np.transpose(axs)

coarse_2d = 3 * coarsen
color = rIVs[::coarse_2d, :3]
center_colors = kmeans.cluster_centers_[:, :3] - color.min(axis=0)
color = color - color.min(axis=0, keepdims=True)
Пример #29
0
        get_tile('http://localhost/tiles/{x}/{y}', ['x', 'y', 'z'], [0, 1, 2])
        
        will fetch the data at the URL: http://localhost/tiles/0/1
        
        This assumes that the data is in CoverageJSON format, and does the work of fetching the data, parsing it, and extracting the actual
        data as a numpy array.
    """
    for axis, tile_index in zip(axis_names, tile_indices):
        url_template = url_template.replace('{' + axis + '}', str(tile_index))
    # Debug line: uncomment to see which tiles are fetched.
    # Note that when printing these may get confused due to multithreading
    #print 'fetching tile from',url_template
    tile_data = json.loads(get_data(url_template))
    tile_values = np.array(tile_data['values'], dtype=float).reshape(tile_data['shape'])
    return tile_values

if __name__ == '__main__':
    # Usage example.
    arrs = get_dask_arrays('http://godiva.rdg.ac.uk/coverage/sst-tiled.json')
    print "Created dask array"
    sst = arrs['analysed_sst-yx_tiling']
    print 'Shape:',sst.shape
    print "Got array, calculating means:"
    print 'Northern Eighth', da.nanmean(sst[0,:450,:]).compute()
    print 'Equatorial Quarter', da.nanmean(sst[0,1350:2250,:]).compute()
    print 'Southern Eighth', da.nanmean(sst[0,3150:,:]).compute()
    # Note that even though we defined c100, each tile is still fetched for each calculation.
    # That's because we've used a naive fetch method, with no caching
    c100 = sst[0,1700:1900,3500:3700]
    print 'Central 100 points', da.nanmean(c100).compute()
    print 'Central 100 points Min/Max', da.nanmin(c100).compute(), da.nanmax(c100).compute()
Пример #30
0
def _stage_2(
    YP: Array,
    X: Array,
    Y: Array,
    alphas: Optional[NDArray] = None,
    normalize: bool = True,
    _glow_adj_alpha: bool = False,
    _glow_adj_scaling: bool = False,
) -> Tuple[Array, Array]:
    """Stage 2 - WGR Meta Regression

    This stage will train separate ridge regression models for each outcome
    using the predictions from stage 1 for that same outcome as features. These
    predictions are then evaluated based on R2 score to determine an optimal
    "meta" estimator (see `_stage_1` for the "base" estimator description). Results
    then include only predictions and coefficients from this optimal model.

    For more details, see the level 1 regression model described in step 1
    of [Mbatchou et al. 2020](https://www.biorxiv.org/content/10.1101/2020.06.19.162354v2).
    """
    assert YP.ndim == 4
    assert X.ndim == 2
    assert Y.ndim == 2
    # Check that chunking across samples is the same for all arrays
    assert YP.numblocks[2] == X.numblocks[0] == Y.numblocks[0]
    assert YP.chunks[2] == X.chunks[0] == Y.chunks[0]
    # Assert single chunks for covariates and outcomes
    assert X.numblocks[1] == Y.numblocks[1] == 1
    # Extract shape statistics
    n_variant_block, n_alpha_1 = YP.shape[:2]
    n_sample_block = Y.numblocks[0]
    n_sample, n_outcome = Y.shape
    n_covar = X.shape[1]
    n_indvar = n_covar + n_variant_block * n_alpha_1
    sample_chunks = Y.chunks[0]

    if normalize:
        assert_block_shape(YP, n_variant_block, 1, n_sample_block, 1)
        assert_chunk_shape(YP, 1, n_alpha_1, sample_chunks[0], n_outcome)
        # See: https://github.com/projectglow/glow/issues/260
        if _glow_adj_scaling:
            YP = da.map_blocks(
                lambda x: (x - x.mean(axis=2, keepdims=True))
                / x.std(axis=2, keepdims=True),
                YP,
            )
        else:
            YP = (YP - YP.mean(axis=2, keepdims=True)) / YP.std(axis=2, keepdims=True)
    # Tranpose for refit on level 1 predictions
    YP = YP.transpose((3, 2, 0, 1))
    assert_array_shape(YP, n_outcome, n_sample, n_variant_block, n_alpha_1)

    if alphas is None:
        # See: https://github.com/projectglow/glow/issues/255
        if _glow_adj_alpha:
            alphas = get_alphas(n_variant_block * n_alpha_1 * n_outcome)
        else:
            alphas = get_alphas(n_variant_block * n_alpha_1)
    n_alpha_2 = alphas.size

    YR = []
    BR = []
    for i in range(n_outcome):
        # Slice and reshape to new 2D covariate matrix;
        # The order of raveling in trailing dimensions is important
        # and later reshapes will assume variants, alphas order
        XPB = YP[i].reshape((n_sample, n_variant_block * n_alpha_1))
        # Prepend covariates and chunk along first dim only
        XPB = da.concatenate((X, XPB), axis=1)
        XPB = XPB.rechunk(chunks=(None, -1))
        assert_array_shape(XPB, n_sample, n_indvar)
        assert XPB.numblocks == (n_sample_block, 1)
        # Extract outcome vector
        YB = Y[:, [i]]
        assert XPB.ndim == YB.ndim == 2
        # Fit and predict folds for each parameter
        BB, YPB = _ridge_regression_cv(XPB, YB, alphas, n_zero_reg=n_covar)[-2:]
        assert_array_shape(BB, n_alpha_2, n_sample_block * n_indvar, 1)
        assert_array_shape(YPB, n_alpha_2, n_sample, 1)
        BR.append(BB)
        YR.append(YPB)

    # Concatenate predictions along outcome dimension
    YR = da.concatenate(YR, axis=2)
    assert_block_shape(YR, 1, n_sample_block, n_outcome)
    assert_chunk_shape(YR, n_alpha_2, sample_chunks[0], 1)
    assert_array_shape(YR, n_alpha_2, n_sample, n_outcome)
    # Move samples to last dim so all others are batch
    # dims for R2 calculations
    YR = da.transpose(YR, (0, 2, 1))
    assert_array_shape(YR, n_alpha_2, n_outcome, n_sample)
    YR = YR.rechunk((-1, -1, None))
    assert_block_shape(YR, 1, 1, n_sample_block)
    assert YR.shape[1:] == Y.T.shape

    # Concatenate betas along outcome dimension
    BR = da.concatenate(BR, axis=2)
    assert_block_shape(BR, 1, n_sample_block, n_outcome)
    assert_chunk_shape(BR, n_alpha_2, n_indvar, 1)
    assert_array_shape(BR, n_alpha_2, n_sample_block * n_indvar, n_outcome)

    # Compute R2 scores within each sample block for each outcome + alpha
    R2 = da.stack(
        [
            r2_score(YR.blocks[..., i], Y.T.blocks[..., i])
            # Avoid warnings on R2 calculations for blocks with single rows
            if YR.chunks[-1][i] > 1 else da.full(YR.shape[:-1], np.nan)
            for i in range(n_sample_block)
        ]
    )
    assert_array_shape(R2, n_sample_block, n_alpha_2, n_outcome)
    # Coerce to finite or nan before nan-aware mean
    R2 = da.where(da.isfinite(R2), R2, np.nan)
    # Find highest mean alpha score for each outcome across blocks
    R2M = da.nanmean(R2, axis=0)
    assert_array_shape(R2M, n_alpha_2, n_outcome)
    # Identify index for the alpha value with the highest mean score
    R2I = da.argmax(R2M, axis=0)
    assert_array_shape(R2I, n_outcome)

    # Choose the predictions corresponding to the model with best score
    YRM = da.stack([YR[R2I[i], i, :] for i in range(n_outcome)], axis=-1)
    YRM = YRM.rechunk((None, -1))
    assert_block_shape(YRM, n_sample_block, 1)
    assert_chunk_shape(YRM, sample_chunks[0], n_outcome)
    assert_array_shape(YRM, n_sample, n_outcome)
    # Choose the betas corresponding to the model with the best score
    BRM = da.stack([BR[R2I[i], :, i] for i in range(n_outcome)], axis=-1)
    BRM = BRM.rechunk((None, -1))
    assert_block_shape(BRM, n_sample_block, 1)
    assert_chunk_shape(BRM, n_indvar, n_outcome)
    assert_array_shape(BRM, n_sample_block * n_indvar, n_outcome)
    return BRM, YRM
Пример #31
0
indexed = da.stack(
    [data[qmask][nn[:, index]] for index in range(0, nn.shape[1])],
    axis=1).rechunk({
        0: 1,
        1: -1
    })
regions = da.map_blocks(find_overlap_regions,
                        indexed,
                        diffvecs.rechunk({0: 1})[..., np.newaxis],
                        mask=weight_mask,
                        dtype=np.float64,
                        chunks=(1, 4, 2, 2, fftsize, fftsize),
                        new_axis=(-1, -2))
region_intensities = regions[:, :, :, 0]
region_weights = regions[:, :, :, 1]
region_means = da.nanmean(region_intensities * region_weights, axis=(
    -1, -2)) / da.nanmean(region_weights, axis=(-1, -2))
region_means = region_means.compute()
region_ratios = region_means[..., 0] / region_means[..., 1]
Iopt_weight = np.where(w_calc[qmask, :4] > w_min - 0.001, w_calc[qmask, :4],
                       0)**4
res_I = minimize(error_func,
                 np.zeros((qmask).sum()),
                 args=(nn[:, 1:], Iopt_weight, np.log(region_ratios)))

rel_intens[qmask] = np.exp(res_I.x)
rel = np.exp(res_I.x)
im = ax.scatter(*pc[:, qmask], c=rel, zorder=5)
ax.set_aspect('equal')
plt.colorbar(im, ax=axs)
print(res_I.message)
# -
Пример #32
0
def composite(src_fps, save_loc, save_nam,
              method="median", comp_mask="all_bad", bbox=None):

    # Prepare save location
    save_dir = os.path.join(save_loc, save_nam)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    # Get extents
    main_extents = output_image_extent(src_fps, bbox)

    # Obtain propertis of output array (same for all bands/images)
    out_extents = main_extents['bounds']
    out_w = main_extents['width']
    out_h = main_extents['height']
    nr_bands = main_extents['bandsCount']

    # Initiate arrays for storing noumber of available & good observations
    nobs = np.zeros((out_h, out_w), dtype=np.int8)
    nok = nobs.copy()

    # Create temp dir if it doesn't exist
    sav_dir = '.\\tmp'
    if not os.path.exists(sav_dir):
        os.mkdir(sav_dir)

    # MAIN LOOP FOR COMPOSITING
    tTim_A = time.time()
    tmp_sav_pth = []
    for band in range(nr_bands):
        print("#\n# Creating composite for Band {}".format(band+1))
        comp_stack = []
        # Loop all images
        for i, fp in enumerate(src_fps):
            str_time = time.time()

            # Open data set
            src = rasterio.open(fp)

            # Save copy of profile for writing tiff at the end
            if band == 0 and i == 0:
                out_meta = src.profile.copy()

            print("#   Processing Image {}.".format(i+1))

            # Skip Reading the image if bbox is out of bounds
            xL, yD, xR, yU = [xy for xy in src.bounds]
            xL_out, yD_out, xR_out, yU_out = out_extents
            chk_bbox = (xL > xR_out or yD > yU_out or
                        xR < xL_out or yU < yD_out)
            if chk_bbox:
                print('#   Image {} not included (out of bounds).'.format(i))
                break

            # Calculate offset for reading and slicing
            win, sl_x, sl_y = image_offset(out_extents, src)

            # ------------------------------
            # Read image and store to pickle
            # ------------------------------
            # Set offset Window for reading of TIF subset
            offset = win

            # Initiate array for output
            comp_band = np.full((out_h, out_w), np.nan, dtype=np.float32)

            # Read image and save to pickle
            print("#     Reading the image.")
            if band == 0:
                tmp_read = src.read(window=offset)
                for nc in range(1, nr_bands):
                    img_nam = ('img' + str(i+1).zfill(2) + "_b"
                               + str(nc+1).zfill(2) + '.p')
                    img_pth = os.path.join(sav_dir, img_nam)
                    pickle.dump(tmp_read[nc], open(img_pth, "wb"))
                tmp_read = tmp_read[0]
            else:
                img_nam = ('img' + str(i+1).zfill(2) + "_b"
                           + str(band+1).zfill(2) + '.p')
                img_pth = os.path.join(sav_dir, img_nam)
                tmp_read = pickle.load(open(img_pth, "rb"))

            # Read the image into the array
            comp_band[sl_y[0]:sl_y[1], sl_x[0]:sl_x[1]] = tmp_read
            tmp_read = None
            src.close()

            # ------------------------------
            # determine bad pixels from mask
            # ------------------------------
            print("#     Determining bad pixels.")
            if band == 0:

                # Get index of mask
                idx_bad = get_mask_idx(fp, offset, comp_mask, dilate=-1)

                # Get index of background
                idx_bck = get_mask_idx(fp, offset, "background")

                # Update nok and nobs
                nobs[sl_y[0]:sl_y[1], sl_x[0]:sl_x[1]] += 1
                nok[sl_y[0]:sl_y[1], sl_x[0]:sl_x[1]] += 1

                nok[idx_bad[0][0]+sl_y[0], idx_bad[0][1]+sl_x[0]] += -1
                nobs[idx_bck[0][0]+sl_y[0], idx_bck[0][1]+sl_x[0]] += -1

                # Save index to pickle for later use
                idx_nam = 'idxBad_' + str(i+1).zfill(2) + '.p'
                idx_pth = os.path.join(sav_dir, idx_nam)
                pickle.dump(idx_bad, open(idx_pth, "wb"))
                idx_bck = None

            else:
                # Read from Pickle
                idx_nam = 'idxBad_' + str(i+1).zfill(2) + '.p'
                idx_pth = os.path.join(sav_dir, idx_nam)
                idx_bad = pickle.load(open(idx_pth, "rb"))

            # Apply mask to image
            if idx_bad[1] > 0:
                comp_band[idx_bad[0][0]+sl_y[0],
                          idx_bad[0][1]+sl_x[0]] = np.nan
                idx_bad = None

            # Stack comp_band array into Dask Array
            comp_stack.append(da.from_array(comp_band, chunks=(1024, 1024)))

            # Close the array to save memory
            comp_band = None

            end_time = time.time()
            print('#   --- Time: %s seconds ---' % (end_time-str_time))

        # Stack all images into 1 array
        stacked = da.stack(comp_stack, axis=0)

        # Calculate composite for selected method with dask
        print("# Compositing Band {}".format(band+1))
        str_time = time.time()
        if method == 'mean':
            comp_out = da.nanmean(stacked, axis=0, keepdims=True).compute()
        elif method == 'median':
            comp_out = da.nanmedian(stacked, axis=0, keepdims=True).compute()
        elif method == 'max':
            comp_out = da.nanmax(stacked, axis=0, keepdims=True).compute()
        elif method == 'min':
            comp_out = da.nanmin(stacked, axis=0, keepdims=True).compute()
        else:
            raise Exception('{} is not a valid compositing '
                            'method!'.format(method))
        end_time = time.time()
        print('# --- Time: %s seconds ---' % (end_time-str_time))

        # After one band is resolved, save to temp file and release memory by
        # deleting the array
        if nr_bands > 1:

            print('# Saving temporary composite file for this band.')

            # Create file name and save using pickle
            sav_fil = 'b_' + str(band+1).zfill(2) + '.p'
            sav_pth = os.path.join(sav_dir, sav_fil)
            pickle.dump(comp_out, open(sav_pth, "wb"))

            # Add to savePth list with filenames
            tmp_sav_pth.append(sav_pth)

            #  Clean up workspace
            comp_out = None

        tTim_B = time.time()
        print('--- Total time: %s seconds --- \n' % (tTim_B - tTim_A))

    # ----------------------------------------------------------------------------
    # OUT OF THE COMPOSITE LOOP RESTORE SAVED FILES AND BUIL TIF
    # ----------------------------------------------------------------------------
    if nr_bands > 1:

        print("# Restoring saved bands.")
        str_time = time.time()

        # Initiate output array
        comp_out = np.full((nr_bands, out_h, out_w), np.nan, dtype=np.float32)

        for bnd, pth in enumerate(tmp_sav_pth):
            comp_out[bnd, :, :] = pickle.load(open(pth, "rb"))

        # Remove temporary folder
        rmtree(sav_dir, ignore_errors=True)
        end_time = time.time()
        print('--- Time: %s seconds ---' % (end_time-str_time))

    # ----------------------------------------------------------------------------
    # SAVE RESULTS TO TIF
    # ----------------------------------------------------------------------------
    print("# Saving composite image to TIFF.")
    str_time = time.time()

    # Save composite
    out_nam = save_nam + "_composite.tif"
    out_pth = os.path.join(save_dir, out_nam)

    out_px = out_meta["transform"][0]
    out_py = out_meta["transform"][4]
    out_trans = Affine(out_px, 0.0, xL_out, 0.0, out_py, yU_out)

    out_meta.update(
        height=comp_out.shape[1], width=comp_out.shape[2],
        transform=out_trans, bigtiff="yes"
        )

    with rasterio.open(out_pth, "w", **out_meta) as dest:
        dest.write(comp_out)

    # Save nok mask
    out_nam = save_nam + "_nok.tif"
    out_pth = os.path.join(save_dir, out_nam)
    nok_meta = out_meta.copy()
    nok_meta.update(
        count=1,
        dtype="int8"
        )

    with rasterio.open(out_pth, "w", **nok_meta) as dest:
        dest.write(np.expand_dims(nok, axis=0))

    # Save nobs mask
    out_nam = save_nam + "_nobs.tif"
    out_pth = os.path.join(save_dir, out_nam)
    with rasterio.open(out_pth, "w", **nok_meta) as dest:
        dest.write(np.expand_dims(nobs, axis=0))

    end_time = time.time()
    print('--- Time: %s seconds ---' % (end_time-str_time))

    tTim_B = time.time()
    print('\n--- Total time: %s seconds --- \n' % (tTim_B - tTim_A))
Пример #33
0
    def ds(self):
        if self._ds is None:
            file_exists = os.path.exists(self._result_file)

            reprocess = not file_exists or self._reprocess

            if reprocess:
                if file_exists:
                    print('Old file exists ' + self._result_file)
                    #print('Removing old file ' + self._result_file)
                    #shutil.rmtree(self._result_file)

                ds_data = OrderedDict()

                to_seconds = np.vectorize(
                    lambda x: x.seconds + x.microseconds / 1E6)

                print('Processing binary data...')
                xx, yy, zz = self._loadgrid()
                if xx is None:
                    if self._from_nc:
                        print('Processing existing netcdf...')
                        fn = self._result_file[:-5] + '_QC_raw.nc'
                        if os.path.exists(fn):
                            ds_temp = xr.open_dataset(self._result_file[:-5] +
                                                      '_QC_raw.nc',
                                                      chunks={'time': 50})
                            u = da.transpose(ds_temp['U'].data,
                                             axes=[3, 0, 1, 2])
                            v = da.transpose(ds_temp['V'].data,
                                             axes=[3, 0, 1, 2])
                            w = da.transpose(ds_temp['W'].data,
                                             axes=[3, 0, 1, 2])
                            tt = ds_temp['time']
                            te = (tt - tt[0]) / np.timedelta64(1, 's')
                            xx = ds_temp['x'].values
                            yy = ds_temp['y'].values
                            zz = ds_temp['z'].values
                        else:
                            print('USING OLD ZARR DATA')
                            ds_temp = xr.open_zarr(self._result_file)
                            u = da.transpose(ds_temp['U'].data,
                                             axes=[3, 0, 1, 2])
                            v = da.transpose(ds_temp['V'].data,
                                             axes=[3, 0, 1, 2])
                            w = da.transpose(ds_temp['W'].data,
                                             axes=[3, 0, 1, 2])
                            tt = ds_temp['time']
                            te = (tt - tt[0]) / np.timedelta64(1, 's')
                            xx = ds_temp['x'].values
                            yy = ds_temp['y'].values
                            zz = ds_temp['z'].values
                            print('ERROR: No NetCDF data found for ' +
                                  self._xml_file)
                            #return None
                            # print(u.shape)

                else:
                    tt, uvw = self._loaddata(xx, yy, zz)
                    if tt is None:
                        print('ERROR: No binary data found for ' +
                              self._xml_file)
                        return None

                    # calculate the elapsed time from the Timestamp objects and then convert to datetime64 datatype
                    te = to_seconds(tt - tt[0])
                    tt = pd.to_datetime(tt)
                    uvw = uvw.persist()
                    u = uvw[:, :, :, :, 0]
                    v = uvw[:, :, :, :, 1]
                    w = uvw[:, :, :, :, 2]


#                    u = xr.DataArray(uvw[:,:,:,:,0], coords=[tt, xx, yy, zz], dims=['time','x', 'y', 'z'],
#                                     name='U', attrs={'standard_name': 'sea_water_x_velocity', 'units': 'm s-1'})
#                    v = xr.DataArray(uvw[:,:,:,:,1], coords=[tt, xx, yy, zz], dims=['time', 'x', 'y', 'z'],
#                                     name='V', attrs={'standard_name': 'sea_water_x_velocity', 'units': 'm s-1'})
#                    w = xr.DataArray(uvw[:,:,:,:,2], coords=[tt, xx, yy, zz], dims=['time', 'x', 'y', 'z'],
#                                     name='W', attrs={'standard_name': 'upward_sea_water_velocity', 'units': 'm s-1'})

                if xx is None:
                    print('No data found')
                    return None

                u = u.persist()
                v = v.persist()
                w = w.persist()

                dx = float(xx[1] - xx[0])
                dy = float(yy[1] - yy[0])
                dz = float(zz[1] - zz[0])

                if self._norm_dims:
                    exp = self._result_root.split('/')[4]
                    runSheet = pd.read_csv('~/RunSheet-%s.csv' % exp)
                    runSheet = runSheet.set_index('RunID')
                    runDetails = runSheet.ix[int(self.run_id[-2:])]

                    T = runDetails['T (s)']
                    h = runDetails['h (m)']
                    D = runDetails['D (m)']

                    ww = te / T
                    om = 2. * np.pi / T
                    d_s = (2. * 1E-6 / om)**0.5
                    bl = 3. * np.pi / 4. * d_s

                    if exp == 'Exp6':
                        if D == 0.1:
                            dy_c = (188. + 82.) / 2
                            dx_c = 39.25
                            cx = dx_c / 1000.
                            cy = dy_c / 1000.
                        else:
                            dy_c = (806. + 287.) / 2. * 0.22
                            dx_c = 113 * 0.22
                            cx = dx_c / 1000.
                            cy = dy_c / 1000.
                    elif exp == 'Exp8':
                        dy_c = 624 * 0.22
                        dx_c = 15
                        cx = dx_c / 1000.
                        cy = dy_c / 1000.
                    xn = (xx + (D / 2. - cx)) / D
                    yn = (yy - cy) / D
                    zn = zz / h

                    xnm, ynm = np.meshgrid(xn, yn)
                    rr = np.sqrt(xnm**2. + ynm**2)
                    cylMask = rr < 0.5

                    nanPlane = np.ones(cylMask.shape)
                    nanPlane[cylMask] = np.nan
                    nanPlane = nanPlane.T
                    nanPlane = nanPlane[np.newaxis, :, :, np.newaxis]

                    u = u * nanPlane
                    v = v * nanPlane
                    w = w * nanPlane

                    if D == 0.1:
                        xInds = xn > 3.
                    else:
                        xInds = xn > 2.

                    blInd = np.argmax(zn > bl / h)
                    blPlane = int(round(blInd))

                    Ue = u[:, xInds, :, :]
                    Ue_bar = da.nanmean(Ue, axis=(1, 2, 3)).compute()
                    Ue_bl = da.nanmean(Ue[:, :, :, blPlane],
                                       axis=(1, 2)).compute()

                    inds = ~np.isnan(Ue_bl)

                    xv = ww[inds] % 1.
                    xv = xv + np.random.normal(scale=1E-6, size=xv.shape)
                    yv = Ue_bl[inds]
                    xy = np.stack([
                        np.concatenate([xv - 1., xv, xv + 1.]),
                        np.concatenate([yv, yv, yv])
                    ]).T
                    xy = xy[xy[:, 0].argsort(), :]
                    xi = np.linspace(-0.5, 1.5, len(xv) / 8)
                    n = np.nanmax(xy[:, 1])
                    # print(n)
                    # fig,ax = pl.subplots()
                    # ax.scatter(xy[:,0],xy[:,1]/n)
                    # print(xy)
                    spl = si.LSQUnivariateSpline(xy[:, 0],
                                                 xy[:, 1] / n,
                                                 t=xi,
                                                 k=3)
                    roots = spl.roots()
                    der = spl.derivative()
                    slope = der(roots)
                    inds = np.min(np.where(slope > 0))
                    dt = (roots[inds] % 1.).mean() - 0.5

                    tpx = np.arange(0, 0.5, 0.001)
                    U0_bl = np.abs(spl(tpx + dt).min() * n)
                    ws = ww - dt
                    Ue_spl = spl((ws - 0.5) % 1.0 + dt) * n * -1.0

                    #maxima = spl.derivative().roots()
                    #Umax = spl(maxima)
                    #UminIdx = np.argmin(Umax)
                    #U0_bl = np.abs(Umax[UminIdx]*n)

                    #ww_at_min = maxima[UminIdx]
                    #ws = ww - ww_at_min + 0.25

                    inds = ~np.isnan(Ue_bar)

                    xv = ww[inds] % 1.
                    xv = xv + np.random.normal(scale=1E-6, size=xv.shape)
                    yv = Ue_bar[inds]
                    xy = np.stack([
                        np.concatenate([xv - 1., xv, xv + 1.]),
                        np.concatenate([yv, yv, yv])
                    ]).T
                    xy = xy[xy[:, 0].argsort(), :]
                    xi = np.linspace(-0.5, 1.5, len(xv) / 8)
                    n = np.nanmax(xy[:, 1])
                    spl = si.LSQUnivariateSpline(xy[:, 0],
                                                 xy[:, 1] / n,
                                                 t=xi,
                                                 k=4)
                    maxima = spl.derivative().roots()
                    Umax = spl(maxima)
                    UminIdx = np.argmin(Umax)
                    U0_bar = np.abs(Umax[UminIdx] * n)

                    ww = xr.DataArray(ww, coords=[
                        tt,
                    ], dims=[
                        'time',
                    ])
                    ws = xr.DataArray(ws - 0.5, coords=[
                        tt,
                    ], dims=[
                        'time',
                    ])

                    xn = xr.DataArray(xn, coords=[
                        xx,
                    ], dims=[
                        'x',
                    ])
                    yn = xr.DataArray(yn, coords=[
                        yy,
                    ], dims=[
                        'y',
                    ])
                    zn = xr.DataArray(zn, coords=[
                        zz,
                    ], dims=[
                        'z',
                    ])

                    Ue_bar = xr.DataArray(Ue_bar,
                                          coords=[
                                              tt,
                                          ],
                                          dims=[
                                              'time',
                                          ])
                    Ue_bl = xr.DataArray(Ue_bl, coords=[
                        tt,
                    ], dims=[
                        'time',
                    ])
                    Ue_spl = xr.DataArray(Ue_spl,
                                          coords=[
                                              tt,
                                          ],
                                          dims=[
                                              'time',
                                          ])

                    ds_data['ww'] = ww
                    ds_data['ws'] = ws

                    ds_data['xn'] = xn
                    ds_data['yn'] = yn
                    ds_data['zn'] = zn

                    ds_data['Ue_bar'] = Ue_bar
                    ds_data['Ue_bl'] = Ue_bl
                    ds_data['Ue_spl'] = Ue_spl

                te = xr.DataArray(te, coords=[
                    tt,
                ], dims=[
                    'time',
                ])

                dims = ['time', 'x', 'y', 'z']
                coords = [tt, xx, yy, zz]

                ds_data['U'] = xr.DataArray(u,
                                            coords=coords,
                                            dims=dims,
                                            name='U',
                                            attrs={
                                                'standard_name':
                                                'sea_water_x_velocity',
                                                'units': 'm s-1'
                                            })
                ds_data['V'] = xr.DataArray(v,
                                            coords=coords,
                                            dims=dims,
                                            name='V',
                                            attrs={
                                                'standard_name':
                                                'sea_water_x_velocity',
                                                'units': 'm s-1'
                                            })
                ds_data['W'] = xr.DataArray(w,
                                            coords=coords,
                                            dims=dims,
                                            name='W',
                                            attrs={
                                                'standard_name':
                                                'sea_water_x_velocity',
                                                'units': 'm s-1'
                                            })
                ds_data['te'] = te

                # stdV = da.nanstd(v)
                # stdW = da.nanstd(w)
                # thres=7.
                if 'U0_bl' in locals():
                    condition = (da.fabs(v) / U0_bl >
                                 1.5) | (da.fabs(w) / U0_bl > 0.6)
                    for var in ['U', 'V', 'W']:
                        ds_data[var].data = da.where(condition, np.nan,
                                                     ds_data[var].data)

                piv_step_frame = float(
                    self._xml_root.findall('piv/stepFrame')[0].text)

                print('Calculating tensor')
                # j = jacobianConv(ds.U, ds.V, ds.W, dx, dy, dz, sigma=1.5)
                j = jacobianDask(u, v, w, piv_step_frame, dx, dy, dz)
                print('Done')
                #j = da.from_array(j,chunks=(20,-1,-1,-1,-1,-1))

                #                j = jacobianDask(uvw[:,:,:,:,0],uvw[:,:,:,:,1], uvw[:,:,:,:,2], piv_step_frame, dx, dy, dz)
                jT = da.transpose(j, axes=[0, 1, 2, 3, 5, 4])

                #                j = j.persist()
                #                jT = jT.persist()

                jacobianNorm = da.sqrt(
                    da.nansum(da.nansum(j**2., axis=-1), axis=-1))

                strainTensor = (j + jT) / 2.
                vorticityTensor = (j - jT) / 2.

                strainTensorNorm = da.sqrt(
                    da.nansum(da.nansum(strainTensor**2., axis=-1), axis=-1))
                vorticityTensorNorm = da.sqrt(
                    da.nansum(da.nansum(vorticityTensor**2., axis=-1),
                              axis=-1))
                divergence = j[:, :, :, :, 0, 0] + j[:, :, :, :, 1,
                                                     1] + j[:, :, :, :, 2, 2]
                # print(divergence)
                omx = vorticityTensor[:, :, :, :, 2, 1] * 2.
                omy = vorticityTensor[:, :, :, :, 0, 2] * 2.
                omz = vorticityTensor[:, :, :, :, 1, 0] * 2.

                divNorm = divergence / jacobianNorm

                #                divNorm = divNorm.persist()

                #                divNorm_mean = da.nanmean(divNorm)
                #                divNorm_std = da.nanstd(divNorm)

                dims = ['x', 'y', 'z']
                comp = ['u', 'v', 'w']

                ds_data['jacobian'] = xr.DataArray(
                    j,
                    coords=[tt, xx, yy, zz, comp, dims],
                    dims=['time', 'x', 'y', 'z', 'comp', 'dims'],
                    name='jacobian')

                ds_data['jacobianNorm'] = xr.DataArray(
                    jacobianNorm,
                    coords=[tt, xx, yy, zz],
                    dims=['time', 'x', 'y', 'z'],
                    name='jacobianNorm')

                ds_data['strainTensor'] = xr.DataArray(
                    strainTensor,
                    coords=[tt, xx, yy, zz, comp, dims],
                    dims=['time', 'x', 'y', 'z', 'comp', 'dims'],
                    name='strainTensor')

                ds_data['vorticityTensor'] = xr.DataArray(
                    vorticityTensor,
                    coords=[tt, xx, yy, zz, comp, dims],
                    dims=['time', 'x', 'y', 'z', 'comp', 'dims'],
                    name='vorticityTensor')

                ds_data['vorticityNorm'] = xr.DataArray(
                    vorticityTensorNorm,
                    coords=[tt, xx, yy, zz],
                    dims=['time', 'x', 'y', 'z'],
                    name='vorticityNorm')

                ds_data['strainNorm'] = xr.DataArray(
                    strainTensorNorm,
                    coords=[tt, xx, yy, zz],
                    dims=['time', 'x', 'y', 'z'],
                    name='strainNorm')

                ds_data['divergence'] = xr.DataArray(
                    divergence,
                    coords=[tt, xx, yy, zz],
                    dims=['time', 'x', 'y', 'z'],
                    name='divergence')

                ds_data['omx'] = xr.DataArray(omx,
                                              coords=[tt, xx, yy, zz],
                                              dims=['time', 'x', 'y', 'z'],
                                              name='omx')

                ds_data['omy'] = xr.DataArray(omy,
                                              coords=[tt, xx, yy, zz],
                                              dims=['time', 'x', 'y', 'z'],
                                              name='omy')

                ds_data['omz'] = xr.DataArray(omz,
                                              coords=[tt, xx, yy, zz],
                                              dims=['time', 'x', 'y', 'z'],
                                              name='omz')

                ds_data['divNorm'] = xr.DataArray(divNorm,
                                                  coords=[tt, xx, yy, zz],
                                                  dims=['time', 'x', 'y', 'z'],
                                                  name='divNorm')

                #                ds_data['divNorm_mean'] = xr.DataArray(divNorm_mean)
                #                ds_data['divNorm_std'] = xr.DataArray(divNorm_std)

                ds = xr.Dataset(ds_data)
                #                if self._from_nc:
                #                    for k,v in ds_temp.attrs.items():
                #                        ds.attrs[k]=v
                #ds = ds.chunk({'time': 20})

                self._append_CF_attrs(ds)
                self._append_attrs(ds)
                ds.attrs['filename'] = self._result_file

                if self._norm_dims:

                    KC = U0_bl * T / D
                    delta = (2. * np.pi * d_s) / h
                    S = delta / KC

                    ds.attrs['T'] = T
                    ds.attrs['h'] = h
                    ds.attrs['D'] = D
                    ds.attrs['U0_bl'] = U0_bl
                    ds.attrs['U0_bar'] = U0_bar
                    ds.attrs['KC'] = KC
                    ds.attrs['S'] = S
                    ds.attrs['Delta+'] = ((1E-6 * T)**0.5) / h
                    ds.attrs['Delta_l'] = 2 * np.pi * d_s
                    ds.attrs['Delta_s'] = d_s
                    ds.attrs['Re_D'] = U0_bl * D / 1E-6
                    ds.attrs['Beta'] = D**2. / (1E-6 * T)

                delta = (ds.attrs['dx'] * ds.attrs['dy'] *
                         ds.attrs['dz'])**(1. / 3.)
                dpx = (ds.attrs['pdx'] * ds.attrs['pdy'] *
                       ds.attrs['pdz'])**(1. / 3.)
                delta_px = delta / dpx
                dt = ds.attrs['piv_step_ensemble']

                #                divRMS = da.sqrt(da.nanmean((divergence * dt) ** 2.))
                #                divRMS = divRMS.persist()
                #                vorticityTensorNorm.persist()
                #                velocityError = divRMS/((3./(2.*delta_px**2.))**0.5)
                # print(da.percentile(ds_new['vorticityTensorNorm'].data.ravel(),99.))
                # print(ds_new['divRMS'])
                # print(ds_new['divNorm_mean'])
                #                vorticityError = divRMS/dt/da.percentile(vorticityTensorNorm.ravel(),99.)

                #                divNorm_mean = da.nanmean(divNorm)
                #                divNorm_std = da.nanstd(divNorm)

                # print("initial save")
                #ds.to_zarr(self._result_file,compute=False)
                #ds = xr.open_zarr(self._result_file)

                #                xstart = np.argmax(xx > 0.05)
                #                ystart = np.argmax(yy > 0.07)

                divRMS = da.sqrt(da.nanmean(
                    (divergence * dt)**2.))  #.compute()
                #divNorm = divergence / jacobianNorm
                #divNorm = divNorm.compute()
                #divNorm_mean = da.nanmean(divNorm).compute()
                #divNorm_std = da.nanstd(divNorm).compute()
                velocityError = divRMS / ((3. / (2. * delta_px**2.))**0.5)
                vortNorm = vorticityTensorNorm  #.compute()

                vorticityError = divRMS / dt / np.percentile(
                    vortNorm.ravel(), 99.)

                velocityError, vorticityError = da.compute(
                    velocityError, vorticityError)

                #ds.attrs['divNorm_mean'] = divNorm_mean
                #ds.attrs['divNorm_std'] = divNorm_std
                ds.attrs['velocityError'] = velocityError
                ds.attrs['vorticityError'] = vorticityError

                if self._norm_dims:
                    xInds = (xn > 0.5) & (xn < 2.65)
                    yInds = (yn > -0.75) & (yn < 0.75)
                else:
                    xInds = range(len(ds['x']))
                    yInds = range(len(ds['y']))
                vrms = (ds['V'][:, xInds, yInds, :]**2.).mean(
                    dim=['time', 'x', 'y', 'z'])**0.5
                wrms = (ds['W'][:, xInds, yInds, :]**2.).mean(
                    dim=['time', 'x', 'y', 'z'])**0.5
                ds.attrs['Vrms'] = float(vrms.compute())
                ds.attrs['Wrms'] = float(wrms.compute())

                #fig,ax = pl.subplots()
                #ax.plot(ds.ws,ds.Ue_spl/U0_bl,color='k')
                #ax.plot(ds.ws,ds.Ue_bl/U0_bl,color='g')
                #ax.set_xlabel(r'$t/T$')
                #ax.set_ylabel(r'$U_{bl}/U_0$')
                #fig.savefig(self._result_file[:-4] + 'png',dpi=125)
                #pl.close(fig)
                # print("second save")
                #ds.to_netcdf(self._result_file)
                ds.to_zarr(self._result_file, mode='w')

                print('Cached ' + self._result_file)

                #ds = xr.open_dataset(self._result_file,chunks={'time':20})
                ds = xr.open_zarr(self._result_file)
                ds.attrs['filename'] = self._result_file
            else:
                #ds = xr.open_dataset(self._result_file,chunks={'time':20})
                ds = xr.open_zarr(self._result_file)
                ds.attrs['filename'] = self._result_file

            self._ds = ds

        return self._ds
Пример #34
0
def tall_clutter(files,
                 config,
                 clutter_thresh_min=0.0002,
                 clutter_thresh_max=0.25,
                 radius=1,
                 write_radar=True,
                 out_file=None,
                 use_dask=False):
    """
    Wind Farm Clutter Calculation

    Parameters
    ----------
    files : list
        List of radar files used for the clutter calculation.
    config : str
        String representing the configuration for the radar.
        Such possible configurations are listed in default_config.py

    Other Parameters
    ----------------
    clutter_thresh_min : float
        Threshold value for which, any clutter values above the
        clutter_thres_min will be considered clutter, as long as they
        are also below the clutter_thres_max.
    clutter_thresh_max : float
        Threshold value for which, any clutter values below the
        clutter_thres_max will be considered clutter, as long as they
        are also above the clutter_thres_min.
    radius : int
        Radius of the area surrounding the clutter gate that will
        be also flagged as clutter.
    write_radar : bool
        Whether to or not, to write the clutter radar as a netCDF file.
        Default is True.
    out_file : string
        String of location and filename to write the radar object too,
        if write_radar is True.
    use_dask : bool
        Use dask instead of running stats for calculation. The will reduce
        run time.

    Returns
    -------
    clutter_radar : Radar
        Radar object with the clutter field that was calculated.
        This radar only has the clutter field, but maintains all
        other radar specifications.

    """
    field_names = get_field_names(config)
    refl_field = field_names["reflectivity"]
    vel_field = field_names["velocity"]
    ncp_field = field_names["normalized_coherent_power"]

    def get_reflect_array(file, first_shape):
        """ Retrieves a reflectivity array for a radar volume. """
        try:
            radar = pyart.io.read(
                file, include_fields=[refl_field, ncp_field, vel_field])
            reflect_array = deepcopy(radar.fields[refl_field]['data'])
            ncp = radar.fields[ncp_field]['data']
            height = radar.gate_z["data"]
            up_in_the_air = height > 2000.0
            the_mask = np.logical_or.reduce(
                (ncp < 0.8, reflect_array.mask, up_in_the_air))
            reflect_array = np.ma.masked_where(the_mask, reflect_array)
            del radar
            if reflect_array.shape == first_shape:
                return reflect_array.filled(fill_value=np.nan)
        except (TypeError, OSError):
            print(file + ' is corrupt...skipping!')
        return np.nan * np.zeros(first_shape)

    if use_dask is False:
        run_stats = _RunningStats()
        first_shape = 0
        for file in files:
            try:
                radar = pyart.io.read(file)
                reflect_array = radar.fields[refl_field]['data']
                ncp = deepcopy(radar.fields[ncp_field]['data'])
                #reflect_array = np.ma.masked_where(ncp < 0.7, reflect_array)

                if first_shape == 0:
                    first_shape = reflect_array.shape
                    clutter_radar = radar
                    run_stats.push(reflect_array)
                if reflect_array.shape == first_shape:
                    run_stats.push(reflect_array)
                del radar
            except (TypeError, OSError):
                print(file + ' is corrupt...skipping!')
                continue
        mean = run_stats.mean()
        stdev = run_stats.standard_deviation()
        clutter_values = stdev / mean
        clutter_values = np.ma.masked_invalid(clutter_values)
        clutter_values_no_mask = clutter_values.filled(clutter_values_max + 1)
    else:
        cluster = LocalCluster(n_workers=20, processes=True)
        client = Client(cluster)
        first_shape = 0
        i = 0
        while first_shape == 0:
            try:
                radar = pyart.io.read(files[i])
                reflect_array = radar.fields[refl_field]['data']
                first_shape = reflect_array.shape
                clutter_radar = radar
            except (TypeError, OSError):
                i = i + 1
                print(file + ' is corrupt...skipping!')
                continue
        arrays = [
            delayed(get_reflect_array)(file, first_shape) for file in files
        ]
        array = [
            da.from_delayed(a, shape=first_shape, dtype=float) for a in arrays
        ]
        array = da.stack(array, axis=0)
        print('## Calculating mean in parallel...')
        mean = np.array(da.nanmean(array, axis=0))
        print('## Calculating standard deviation...')
        count = np.array(da.sum(da.isfinite(array), axis=0))
        stdev = np.array(da.nanstd(array, axis=0))
        clutter_values = stdev / mean
        clutter_values = np.ma.masked_invalid(clutter_values)
        clutter_values = np.ma.masked_where(
            np.logical_or(clutter_values.mask, count < 20), clutter_values)
        # Masked arrays can suck
        clutter_values_no_mask = clutter_values.filled(
            (clutter_thresh_max + 1))

    shape = clutter_values.shape
    mask = np.ma.getmask(clutter_values)
    is_clutters = np.argwhere(
        np.logical_and.reduce((
            clutter_values_no_mask > clutter_thresh_min,
            clutter_values_no_mask < clutter_thresh_max,
        )))
    clutter_array = _clutter_marker(is_clutters, shape, mask, radius)
    clutter_radar.fields.clear()
    clutter_array = clutter_array.filled(0)
    clutter_dict = _clutter_to_dict(clutter_array)
    clutter_value_dict = _clutter_to_dict(clutter_values)
    clutter_value_dict["long_name"] = "Clutter value (std. dev/mean Z)"
    clutter_value_dict["standard_name"] = "clutter_value"
    clutter_radar.add_field('ground_clutter',
                            clutter_dict,
                            replace_existing=True)
    clutter_radar.add_field('clutter_value',
                            clutter_value_dict,
                            replace_existing=True)
    if write_radar is True:
        pyart.io.write_cfradial(out_file, clutter_radar)
    del clutter_radar
    return
Пример #35
0
def mask_imputation(
        array: da.core.Array,
        mask_values: Optional[da.core.Array] = None,
        fill_value: int = 0,
        mask_method: str = 'mean',
        mask_axis: int = 0) -> Tuple[da.core.Array, sparse._coo.core.COO]:
    """ Creates the mask that will fill "array" and the filled_array that has the missing values of "array" filled.

    If A is array and has missing values

    A = [[1, 2, 3],
         [?, 4, 5],
         [3, 4, ?]]

    Then mask is a Sparse COO array that has teh following entries

    mask = [[-, -, -],
            [a, -, -],
            [-, -, b]] Where "-" refers 0, but is replaced with "-" to show that value is not stored

    Then the filled array is:

    A_filled = [[1, 2, 3],
                [f, 4, 5],
                [3, 4, f]] Where "f" refers to a common fill value specified as "fill_value"


    Parameters
    ----------
    array : array_like, shape (N, P)
        Array that a copy of will be filled, and if needed mask values will be computed from
    mask_values : array_like, shape (P,) optional
        Values to fill mask with, if already computed
    fill_value : int
        Value that will be used to fill NaN values in array
    mask_method : str
        Method used to compute mask_values. Only used if mask_values is not specified
    mask_axis : int
        Axis in which values will be computed from.

        axis = 0 ===> column summary of values
        axis = 1 ===> row summary of values

    Returns
    -------
    filled_array: dask array, shape (N, P)
        copy of "array" with nan_values filled, if specified
    mask : dask array, shape (N, P)
        sparse dask array with mask values where "array" is NaN.

    """
    if not isinstance(array._meta, np.ndarray):
        raise ValueError(
            f'expected meta, {type(np.ndarray)},  but got {type(array._meta)}')

    if mask_values is None:
        if mask_method == 'mean':
            mask_values = da.nanmean(array, axis=mask_axis).compute()
        else:
            raise NotImplementedError(
                f'mask_method, {mask_method}, is not implemented ')
    else:
        try:
            mask_values = mask_values.compute()
        except AttributeError:
            pass

    coords = compute(*da.where(da.isnan(array)))
    if not len(coords[0]):
        raise ValueError(
            'expected array to have maskable values, but got none.')

    data = axis_wise_COO_data_by_axis(mask_values, coords, axis=1 - mask_axis)

    mask = sparse.COO(coords=np.vstack(coords),
                      data=data,
                      shape=array.shape,
                      has_duplicates=False,
                      cache=True)
    mask = da.from_array(mask, chunks=array.shape)

    filled_array = fill_array(array, fill_value=fill_value).persist()

    return filled_array, mask
Пример #36
0
    b = a.any()
    # The request of a Numpy comparison is a dask array are stored as operations
    # to perform, but the operations have not been perfoemd yet.
    print('b:', b)
    # So to see the values we need to perform operations and convert to Numpy array.
    print('b.compute():', b.compute())

# Block 2
if False:
    # Most of the Numpy operations are available in Dask computations.
    a = da.random.random(1000, chunks=100)
    #    x = da.arange(1000, chunks=100)
    b = da.ones(1000, chunks=100)

    c = b - a
    c = da.nanmean(c)

    print('c:', c)
    print('c.compute():', c.compute())

# Block 3
# How much faster is dask than Numpy at some calculations?
if False:

    # Let's make a large array.
    num = 100000000
    # What if that array is smaller? Turns out the overhead of implementing Dask
    # can make it slower for smaller datasets.
    #    num = 10000
    start_time = time.time()
    b = np.ones(num) - np.random.random(num)