def __init__(self, lattice='square', p=0.5, calc_fields=True):
        super().__init__(lattice, p, method='charges')
        self.g = pc.get_geom(lat=lattice, porosity=p)
        self.length_factor = 1
        cs = None
        if calc_fields:
            ds = (pc.process_lattice(lattice, layers_to_keep=1,
                                     porosity=p).pipe(pc.linear_response).pipe(
                                         pc.nl_response))
            cs = pc.calc_charge_fields(ds, reference=True)
            self.ds = ds
            self.stress = xr.dot(cs.s, ds.Q0, dims='charge').values[[0, 2, 1]]
            self.d_lin = xr.dot(cs.d.sel(order=1), ds.Q0, dims='charge').values
            self.d_mode = xr.dot(cs.d.sel(order=1),
                                 ds.cmodes.isel(mode=0),
                                 dims='charge')

            # calculate boundary
            edges, nodes = pc.all_edges(self.g)
            inds = np.unique(np.concatenate([edges.i1, edges.i2]))
            self.boundary = inds

        self.holes = ds.holes.values.T[:, 1:]  # shape = (2, N_holes)
        if cs is not None:
            self.real_nodes = cs.reference.values
        else:
            self.real_nodes = self.g.p
        self.calc_tri()
Пример #2
0
def CGGM_neg_log_likelihood(Lambda, Sigma, Theta, Syy, Sxx, Sxy, n_xr):
    # determinant of the covariance
    # normalised by n_k/n, our renormalisation convention
    determinants = np.sum(
        -0.5 * n_xr * np.log(np.linalg.det(Lambda))) / n_xr.sum()

    # dotproduct in the exponent
    # the Sxx... are already renormalised by 1/n
    dotproduct_yy = xr.dot(Lambda, Syy, dims=["component_y", "component_yT"])
    dotproduct_xy = 2 * xr.dot(Theta, Sxy, dims=["component_y", "component_x"])
    dotproduct_xx = xr.dot(xr.dot(Theta,
                                  xr.dot(Sigma, Theta,
                                         dims=["component_y"]).rename({
                                             "component_yT":
                                             "component_y",
                                             "component_x":
                                             "component_xT"
                                         }),
                                  dims=["component_y"]),
                           Sxx,
                           dims=["component_x", "component_xT"])

    dotproducts = 0.5 * (dotproduct_yy + dotproduct_xx +
                         dotproduct_xy).sum(dim=["label"])
    return float(determinants +
                 dotproducts)  # + 0.5*np.log(2*np.pi)*len(Lambda.component_y)
Пример #3
0
def by_label_log_likelihood(X, Y, Lambda, Sigma, Theta):
    # products of all the components together (no sum over the observations yet)
    # such that: Sxx = sum_{observations} xx / n_total
    xx = X * X.rename({"component_x": "component_xT"})
    yy = Y * Y.rename({"component_y": "component_yT"})
    xy = X * Y

    # determinant of the covariance
    determinants = xr.DataArray(data=-0.5 * np.log(np.linalg.det(Lambda)),
                                coords=[Lambda.label.values],
                                dims=["label"])

    # dotproduct in the exponent
    dotproduct_yy = xr.dot(Lambda, yy, dims=["component_y", "component_yT"])
    dotproduct_xy = 2 * xr.dot(Theta, xy, dims=["component_y", "component_x"])
    dotproduct_xx = xr.dot(xr.dot(Theta,
                                  xr.dot(Sigma, Theta,
                                         dims=["component_y"]).rename({
                                             "component_yT":
                                             "component_y",
                                             "component_x":
                                             "component_xT"
                                         }),
                                  dims=["component_y"]),
                           xx,
                           dims=["component_x", "component_xT"])

    dotproducts = 0.5 * (dotproduct_yy + dotproduct_xx + dotproduct_xy)
    return -1 * (determinants + dotproducts
                 )  # - 0.5*np.log(2*np.pi)*len(Lambda.component_y)
Пример #4
0
    def ProjectCorrs(self):
        if not hasattr(self, 'left_evec') or not hasattr(self, 'right_evec'):
            warn(
                'PerformVarMeth need to be called to project, attempting now with default t0='
                + str(self.t0) + ' and dt=' + str(self.dt))
            self.PerformVarMeth()
        self.proj_corr = xr.dot(self.left_evec, self.matrix_corr, dims='ism')
        self.proj_corr = xr.dot(self.right_evec, self.proj_corr, dims='jsm')
        if hasattr(self, 'booted_mcorr') and self.booted_mcorr is not None:
            self.booted_proj_corr = self.proj_corr.copy()
            self.booted_proj_corr = xr.apply_ufunc(
                lambda x: BootStrap(thisnboot=self.nboot),
                self.booted_proj_corr,
                vectorize=True)
            for iboot in range(self.nboot):
                this_boot_mcorr = xr.apply_ufunc(lambda x: x.bootvals[iboot],
                                                 self.booted_mcorr,
                                                 vectorize=True)
                this_booted_proj_corr = xr.dot(self.left_evec,
                                               this_boot_mcorr,
                                               dims='ism')
                this_booted_proj_corr = xr.dot(self.right_evec,
                                               self.proj_corr,
                                               dims='jsm')

                def this_fun(itemp, boot_val):
                    itemp.bootvals = itemp.bootvals.append(
                        pa.Series([boot_val]))
                    return itemp

                self.booted_proj_corr = xr.apply_ufunc(this_fun,
                                                       self.booted_proj_corr,
                                                       this_booted_proj_corr,
                                                       vectorize=True)
Пример #5
0
def gaussian_log_likelihood(Y, mu, Lambda):
    # Y size: n x p
    # mu size: K x p
    # Lambda size: K x p x p
    # returns the log of the gaussian density of each observation in x
    # for each values of the parameter across all clases
    # output size: n x K

    K = len(mu.label)
    p = len(mu.component_y)
    n = len(Y.observation_id)

    # (x-mu)^T Lambda (x-mu)
    dotproduct = xr.DataArray(xr.dot(Lambda, Y - mu, dims=['component_y']),
                              coords=[range(K), range(p),
                                      range(n)],
                              dims=['label', 'component_y', 'observation_id'])
    dotproduct_final = xr.dot(Y - mu, dotproduct, dims=['component_y'])

    # det(Lambda)
    determinants = xr.DataArray([np.linalg.det(Lambda[k]) for k in range(K)],
                                coords=[range(K)],
                                dims=['label'])

    #Sigma_da.reduce(lambda x, axis: np.linalg.det(x), dim = 'label')

    # numerator of the Bayes rule to get p_i,k^t
    log_p = 0.5 * (-dotproduct_final + np.log(determinants) -
                   p * np.log(2 * np.pi))
    return log_p
Пример #6
0
def Q(
        # observations
        Y,
        X,
        # new parameter values
        pi,
        Lambda,
        Sigma,
        Theta,
        # current parameter values
        pi_t,
        Lambda_t,
        Sigma_t,
        Theta_t):

    # label weights (E step)
    p_t = label_weights_E_step(
        # observations
        Y,
        X,
        # current parameter values
        pi_t,
        Lambda_t,
        Sigma_t,
        Theta_t)

    # Exhaustive statistics weighted by the E step
    # careful here: remember we divide by n_total (p_t.sum()) not by n_k (p_t.sum(dim=["observation_id"]))
    # this is just by convention, so that the penalty intensity does not have to scale with n, it's done everywhere

    Sxx_unsupervised = xr.dot(p_t * X,
                              X.rename({"component_x": "component_xT"}),
                              dims="observation_id") / p_t.sum()
    Syy_unsupervised = xr.dot(p_t * Y,
                              Y.rename({"component_y": "component_yT"}),
                              dims="observation_id") / p_t.sum()
    Sxy_unsupervised = xr.dot(p_t * X, Y, dims="observation_id") / p_t.sum()

    # get the log likelihood by class w/ param theta
    # log P_theta(x_i, z_i = k) = log P_theta(x_i | z_i = k) + log(pi_k)
    log_likelihood = -1. * CGGM_neg_log_likelihood(
        Lambda,
        Sigma,
        Theta,
        Syy_unsupervised,
        Sxx_unsupervised,
        Sxy_unsupervised,
        n_xr=p_t.sum(dim=["observation_id"]))
    # the result is divided by n_total (p_t.sum() )
    return np.float(log_likelihood +
                    (np.log(pi) * p_t.sum(dim=["observation_id"])).sum() /
                    p_t.sum())
Пример #7
0
def NSE_diff(pred, obs):
    """Calculate the Nash-sutcliffe efficiency for models trained on differences.

    Denominator: variance of predicting the mean value of differences.

    This variation of the NSE takes into account that the most appropriate
    baseline forecast is persistence when predicting differences.

    TODO: this function assumes pred.forecast_day.values is scalar, what if not?

    Parameters
    ----------
    pred : xr.DataArray
        the prediction array
    obs : xr.DataArray
        the true values to compare with

    Returns
    -------
    (float, float)
        1) Variance reduction due compared to persistence forecasts
        2) NSE of persistence forecasts (is a variance reduction too)
    """
    inits = pred.init_time.values

    # for subtracting observations, the time coordinate has to be its valid time
    pred = pred.swap_dims({'init_time': 'time'})
    diff = pred - obs

    # now we will iterate over the init_time, so this shall be our index
    diff = diff.swap_dims({'time': 'init_time'})
    pred = pred.swap_dims({'time': 'init_time'})

    err = err_persistence = err_mean_dis = 0
    for init in inits:
        valid_time = pred.sel(init_time=init).time

        # Variance Reduction compared to persistence forecasts
        d = diff.sel(init_time=init)  # is scalar if using one fcstday!
        err += float(xr.dot(d, d))

        persistence = obs.sel(time=init)
        d = persistence - obs.sel(time=valid_time)
        err_persistence += float(xr.dot(d, d))

        # NSE of persistence forecasts
        d = obs.mean() - obs.sel(time=valid_time)
        err_mean_dis += float(xr.dot(d, d))

    return float(1 -
                 err / err_persistence), float(1 -
                                               err_persistence / err_mean_dis)
Пример #8
0
    def pass_forward(self, inputs, past_inputs):
        i, p = self.layer_prev.pass_forward(inputs, past_inputs)
        pre_activation = xr.dot(i, self.m_weights,
            dims=([*self.layer_prev.coords]).add(self.m_biases)
        activation = self.func_activation(pre_activation)
        return activation, p.append({KEY_IN: i, KEY_OUT_PRE: pre_activation})

    def pass_back(self, gradient, past_inputs, past_outputs):
        grad, inputs, p = self.layer_next.pass_back(gradient, past_inputs, past_outputs)
        i = inputs.pop()
        grad_b = np.multiply(grad, self.func_activation_d(i[KEY_OUT_PRE]))
        grad_w = np.multiply(grad_b, i[KEY_IN])
        grad_next =  xr.dot(grad_b, self.m_weights, dims=[*coords])
        return grad_next, inputs, p.append({KEY_B: grad_b, KEY_W: grad_w})


class ConvolutionLayer(Layer):

    def pass_forward(self, inputs, past_inputs):
        pass #TODO
        
    def pass_back(self, gradient, past_inputs, past_outputs):
        pass #TODO

def test():
    NUM_CASES = 5
    NUM_INPUTS = 10
    NUM_OUTPUTS = 2
    DIM_IN = 'inputs'
    DIM_OUT = 'neurons'
    DIM_LABEL = 'labels'
    DIM_CASE = 'cases'

    layer0 = InputLayer({DIM_IN: NUM_INPUTS})
    layer1 = FullyConnectedLayer(layer0, {DIM_OUT: NUM_OUTPUTS})
    layer2 = OutputLayer(layer1)

    input_coords = {DIM_CASE: np.arange(NUM_CASES), DIM_IN: np.arange(NUM_INPUTS)}
    inputs = xr.DataArray(np.ones((NUM_CASES,NUM_INPUTS)), dims=[*input_coords], coords=input_coords)
    label_coords = {DIM_CASE: np.arange(NUM_CASES), KEY_LABELS: np.arange(NUM_OUTPUTS)}
    labels = xr.DataArray(np.ones((NUM_CASES, NUM_OUTPUTS)), dims=[*label_coords], coords=label_coords)

    #TODO
    out, outputs = layer2.pass_forward(inputs)
    gradients = layer0.pass_back(labels, outputs)


if __name__ == '__main__':
    test()
Пример #9
0
def hierarchical_ggm(Y,
                     true_labels,
                     rho,
                     l1,
                     l2,
                     Lambda_shift_threshold,
                     loss_shift_threshold,
                     verbose=False):
    p = len(Y.component_y)
    K = len(true_labels.label)

    # get number of observations by class
    N = true_labels.sum(dim="observation_id")

    # get empirical mu
    mu = (Y * true_labels).sum(dim="observation_id") / N
    # get empirical sigma (S)
    centered_observations = Y - mu
    centered_observations_T = xr.DataArray(
        data=centered_observations,
        coords=[centered_observations.observation_id,
                range(p),
                range(K)],
        dims=["observation_id", "component_yT", "label"])

    S = xr.dot(true_labels * centered_observations,
               centered_observations_T,
               dims=['observation_id']) / N

    # penalised optimisation, GGL
    Lambda = xr.DataArray([np.eye(p)] * K, coords=S.coords, dims=S.dims)
    Z = xr.DataArray(np.zeros((K, p, p)), coords=S.coords, dims=S.dims)
    U = xr.DataArray(np.zeros((K, p, p)), coords=S.coords, dims=S.dims)
    Lambda_shift = 1
    loss_shift = 1
    # while loop with convergence check
    while ((Lambda_shift > Lambda_shift_threshold
            or loss_shift > loss_shift_threshold)):
        #and loss_shift > 0):
        # if penalty == "GGL"
        loss = GGL_loss(Lambda, S, N, l1, l2)
        if verbose:
            print(loss)
        # one ADMM update
        #if penalty == "GGL":
        Lambda_prime, Z, U = update_admm_GGL(S, N, Lambda, Z, U, rho, l1, l2)
        loss_prime = GGL_loss(Lambda_prime, S, N, l1, l2)

        loss_shift = (loss - loss_prime) / np.abs(loss)
        Lambda_shift = np.float(
            np.sum(np.abs(Lambda_prime - Lambda)) / np.sum(np.abs(Lambda)))
        if verbose:
            print(Lambda_shift)
            print(loss_shift)
            print()

        Lambda = Lambda_prime
        loss = loss_prime

    return mu, Lambda
Пример #10
0
 def _compute_wave_coefficient_vector(self):
     """
     Build the wave coefficient vector `r`
     """
     r = xr.dot(self.A_inv, self.b).rename({"_hpoly": "hpoly"})
     r.name = "wave_coefficient_vector"
     return r
Пример #11
0
def get_D_KL_from_xarray(da_P_X_Y, da_P_X, da_P_Y):
    """
    base 10 : Mutual information of
    I_matrix = xr.apply_ufunc(func_D_KL, P_X_Y, P_X, P_Y)
    return I_matrix.sum()
    """
    da_log2 = xr.zeros_like(da_P_X_Y)
    import itertools
    str_dim_x = da_P_X.dims[0]
    str_dim_y = da_P_Y.dims[0]
    for realiz_id_x, realiz_id_y in itertools.product(
            da_P_X_Y[str_dim_x].values, da_P_X_Y[str_dim_y].values):
        p_xy = da_P_X_Y.loc[{str_dim_x: realiz_id_x, str_dim_y: realiz_id_y}]
        p_x = da_P_X.loc[{str_dim_x: realiz_id_x}]
        p_y = da_P_Y.loc[{str_dim_y: realiz_id_y}]
        log_p_xy_over_p_x_p_y = ufunc_log_pxy_over_px_py(p_xy, p_x, p_y)
        da_log2.loc[{
            str_dim_x: realiz_id_x,
            str_dim_y: realiz_id_y
        }] = log_p_xy_over_p_x_p_y
        # da_log2.loc[{str_dim_x:realiz_id_x, str_dim_y:realiz_id_y}] =
    # print("da_log2: ", da_log2)
    # print("da_P_X_Y: ", da_P_X_Y)
    mutual_information = xr.dot(da_P_X_Y, da_log2)
    print("mutual_information (", str_dim_x, ", ", str_dim_y, "): ",
          mutual_information.values)
    return mutual_information
Пример #12
0
def sea_ice_area(sic: xarray.DataArray, area: xarray.DataArray, thresh: str = "15 pct"):
    """Total sea ice area.

    Sea ice area measures the total sea ice covered area where sea ice concentration is above a threshold,
    usually set to 15%.

    Parameters
    ----------
    sic : xarray.DataArray
      Sea ice concentration [0,1].
    area : xarray.DataArray
      Grid cell area [m²]
    thresh : str
      Minimum sea ice concentration for a grid cell to contribute to the sea ice extent.

    Returns
    -------
    Sea ice area [m²].

    Notes
    -----
    To compute sea ice area over a subregion, first mask or subset the input sea ice concentration data.

    References
    ----------
    `What is the difference between sea ice area and extent
    <https://nsidc.org/arcticseaicenews/faq/#area_extent>`_

    """
    t = convert_units_to(thresh, sic)
    factor = convert_units_to("100 pct", sic)
    out = xarray.dot(sic.where(sic >= t, 0), area) / factor
    out.attrs["units"] = area.units
    return out
Пример #13
0
def sea_ice_extent(sic, area, thresh="15 pct"):
    """Return the total sea ice extent.

    Sea ice extent measures the *ice-covered* area, where a region is considered ice-covered if its sea ice
    concentration is above a threshold usually set to 15%.

    Parameters
    ----------
    sic : xarray.DataArray
      Sea ice concentration [0,1].
    area : xarray.DataArray
      Grid cell area [m²]
    thresh : str
      Minimum sea ice concentration for a grid cell to contribute to the sea ice extent.

    Returns
    -------
    Sea ice extent [m²].

    Notes
    -----
    To compute sea ice area over a subregion, first mask or subset the input sea ice concentration data.

    References
    ----------
    `What is the difference between sea ice area and extent
    <https://nsidc.org/arcticseaicenews/faq/#area_extent>`_
    """
    t = utils.convert_units_to(thresh, sic)
    out = xarray.dot(sic >= t, area)
    out.attrs["units"] = area.units
    return out
Пример #14
0
 def micro(self):
     weights_micro = xr.apply_ufunc(
         get_micro_ensemble,
         self._obj.e_vals.groupby(self.outer_dim),
         self.E_0.groupby(self.outer_dim),
         kwargs={"delta_E": self.delta_E},
     )
     return xr.dot(weights_micro, self._obj.eev, dims=self.state_dim)
Пример #15
0
 def canonical(self):
     beta = self.beta
     weights_canonical = xr.apply_ufunc(
         get_weights_canonical,
         self._obj.e_vals.groupby(self.outer_dim),
         beta.groupby(self.outer_dim),
     )
     return xr.dot(weights_canonical, self._obj.eev, dims=self.state_dim)
Пример #16
0
def covariance(x, y, dim=None):
    valid_values = x.notnull() & y.notnull()
    valid_count = valid_values.sum(dim)

    demeaned_x = (x - x.mean(dim)).fillna(0)
    demeaned_y = (y - y.mean(dim)).fillna(0)

    return xr.dot(demeaned_x, demeaned_y, dims=dim) / valid_count
Пример #17
0
def construct_array_relative_beamvector(maintx: xr.DataArray, mainrx: xr.DataArray, tx_angle: xr.DataArray,
                                        rx_angle: xr.DataArray):
    """
    Given the orientation vectors representing the transmitter/receiver at time of ping/receive (maintx, mainrx) and
    the TX/RX steering angles (tx_angle, rx_angle), determine new 3d beam vector components at the midpoint between
    the TX and RX.  This would be the 'actual' array relative beam vector.

    This is a simplification of the actual scenario, adding error in the xyz due to the difference in path length/
    direction of the actual ray from tx-seafloor and seafloor-rx and this co-located assumption (tx-seafloor and
    rx-seafloor are the same is the assumption)

    x = +FORWARD, y=+STARBOARD, z=+DOWN

    Returns:

    3d beam vector in co-located array ref frame.  Of shape (xyz, time, beam), with 10 times and 200 beams,
    beamvecs shape would be (3, 10, 200)

    | <xarray.DataArray 'tiltangle' (xyz: 3, time: 10, beam: 200)>
    | dask.array<concatenate, shape=(3, 10, 200), dtype=float64, chunksize=(1, 10, 200), chunktype=numpy.ndarray>
    | Coordinates:
    |   * time     (time) float64 1.496e+09 1.496e+09 ...
    |   * beam     (beam) int32 0 1 2 3 4 5 6 7 8 ... 194 195 196 197 198 199 200
    |   * xyz      (xyz) object 'x' 'y' 'z'

    Parameters
    ----------
    maintx
        orientation vector for transmitter at time of transmit, 2dim of shape (time, xyz)
    mainrx
        orientation vector for receiver at time of receive, 2dim of shape (time, xyz)
    tx_angle
        transmitter tiltangle for each ping time
    rx_angle
        receiver beam pointing angle for each ping time

    Returns
    -------
    xr.DataArray
        3d beam vector in co-located array ref frame
    """

    # delta - alignment angle between tx/rx vecs
    delt = np.arccos(xr.dot(maintx, mainrx, dims=['xyz'])) - np.pi / 2
    ysub1 = -np.sin(rx_angle)

    # solve for components of 3d beam vector
    ysub1 = ysub1 / np.cos(delt)
    ysub2 = np.sin(tx_angle) * np.tan(delt)
    radial = np.sqrt((ysub1 + ysub2) ** 2 + np.sin(tx_angle) ** 2)
    x = np.sin(tx_angle)
    y = ysub1 + ysub2
    z = np.sqrt(1 - radial ** 2)

    # generate new dataarray object for beam vectors
    newx, _ = xr.broadcast(x, y)  # broadcast to duplicate x along beam dimension
    beamvecs = xr.concat([newx, y, z], pd.Index(list('xyz'), name='xyz'))
    return beamvecs
Пример #18
0
def _xcentral_moments(
    x,
    mom,
    w=None,
    axis=0,
    last=True,
    mom_dims=None,
):

    assert isinstance(x, xr.DataArray)

    if isinstance(mom, tuple):
        mom = mom[0]

    if mom_dims is None:
        mom_dims = ("mom_0", )
    if isinstance(mom_dims, str):
        mom_dims = (mom_dims, )
    assert len(mom_dims) == 1

    if w is None:
        w = xr.ones_like(x)
    else:
        w = xr.DataArray(w).broadcast_like(x)

    if isinstance(axis, int):
        dim = x.dims[axis]
    else:
        dim = axis

    wsum = w.sum(dim=dim)
    wsum_inv = 1.0 / wsum

    xave = xr.dot(w, x, dims=dim) * wsum_inv

    p = xr.DataArray(np.arange(0, mom + 1), dims=mom_dims)
    dx = (x - xave)**p
    out = xr.dot(w, dx, dims=dim) * wsum_inv

    out.loc[{mom_dims[0]: 0}] = wsum
    out.loc[{mom_dims[0]: 1}] = xave

    if last:
        out = out.transpose(..., *mom_dims)
    return out
Пример #19
0
    def ProjectCorrsProny(self, ism='First'):
        if not hasattr(self, 'pro_evec'):
            warn(
                'PerformProny need to be called to project, attempting now with default t0='
                + str(self.t0) + ', dt=' + str(self.dt) + ' and ism=' +
                str(ism))
            self.PerformProny()
        self.pro_proj_corr = xr.dot(self.pro_evec,
                                    self.matrix_corr,
                                    dims='jsm')
        if ism == 'First':
            self.pro_proj_corr = self.pro_proj_corr.isel(ism=0)
        else:
            self.pro_proj_corr = self.pro_proj_corr.sel(ism=ism)
        if hasattr(self, 'booted_mcorr') and self.booted_mcorr is not None:
            self.booted_pro_proj_corr = self.pro_proj_corr.copy()
            self.booted_pro_proj_corr = xr.apply_ufunc(
                lambda x: BootStrap(thisnboot=self.nboot),
                self.booted_pro_proj_corr,
                vectorize=True)
            for iboot in range(self.nboot):
                this_boot_mcorr = xr.apply_ufunc(lambda x: x.bootvals[iboot],
                                                 self.booted_mcorr,
                                                 vectorize=True)
                # this_booted_proj_corr = xr.dot(self.left_evec,this_boot_mcorr,dims='ism')
                if ism == 'First':
                    this_booted_proj_corr = this_boot_mcorr.isel(ism=0)
                else:
                    this_booted_proj_corr = this_boot_mcorr.sel(ism=ism)
                this_booted_proj_corr = xr.dot(self.pro_evec,
                                               this_booted_proj_corr,
                                               dims='jsm')

                def this_fun(itemp, boot_val):
                    itemp.bootvals = itemp.bootvals.append(
                        pa.Series([boot_val]))
                    return itemp

                self.booted_pro_proj_corr = xr.apply_ufunc(
                    this_fun,
                    self.booted_pro_proj_corr,
                    this_booted_proj_corr,
                    vectorize=True)
Пример #20
0
def reduced_Q(Lambda, S, N_t):
    # simply log det - trace, no 2pi, no pi_t

    K = len(Lambda.label)
    # det(Lambda)
    determinants = xr.DataArray([np.linalg.det(Lambda[k]) for k in range(K)],
                                coords=[range(K)],
                                dims=['label'])

    # trace( Lambda.S)
    traces = xr.dot(Lambda, S, dims=["component_y", "component_yT"])

    # reduced Q function (part depending of Lambda only)
    return np.float(np.sum(0.5 * (np.log(determinants) - traces) * N_t))
Пример #21
0
    def pass_forward(self, inputs, func_normalize=lambda x: x):
        """Applies NeuralNet to inputs

        Arguments:
            inputs {xarray[dims: DIM_IN]} -- xarray with dimension
                DIM_IN, same size as DIM_IN in self.matrices[mkey(0, KEY_WEIGHT)]

        Keyword Arguments:
            func_normalize {function(np_array): np_array} -- function to
                apply to inputs before passing through neural network (default: {lambdax:x})

        Raises:
            ValueError -- Missing dimension DIM_IN in inputs
            ValueError -- Size of inputs dimension DIM_IN does not match layer 0 size

        Returns:
            dict(xarray[dims: KEY_OUT_PRE or KEY_OUT_POST]) -- Dictionary containing
                (num_layers + 1) xarrays of intermediate layer outputs,
                with 2 matrices: KEY_OUT_PRE (without activation function applied)
                and KEY_OUT_POST (with activation function applied)
        """

        if not DIM_IN in inputs.dims:
            raise ValueError('Missing dimension \'' + DIM_IN + '\' in inputs')
        tsize = inputs.sizes[DIM_IN]
        msize = self.matrices[mkey(0, KEY_WEIGHT)].sizes[DIM_IN]
        if tsize != msize:
            raise ValueError('Size of \'' + DIM_IN + '\'=' + str(tsize) +
                             ' does not match layer 0 size: ' + str(msize))

        # ugly hack: remove coordinates for dimension 'inputs' if coordinates present
        if DIM_IN in inputs.coords:
            inputs = inputs.reset_index(DIM_IN, drop=True)
        activations = {
            mkey(0, KEY_OUT_PRE): inputs,
            mkey(0, KEY_OUT_POST): func_normalize(inputs)
        }
        for i in range(self.num_layers):
            pre_activation = np.add(
                xr.dot(activations[mkey(i, KEY_OUT_POST)],
                       self.matrices[mkey(i, KEY_WEIGHT)],
                       dims=(DIM_IN)), self.matrices[mkey(i, KEY_BIAS)])
            activations[mkey(i + 1, KEY_OUT_PRE)] = pre_activation.rename(
                {DIM_OUT: DIM_IN})
            activations[mkey(
                i + 1,
                KEY_OUT_POST)] = self.func_activation(pre_activation).rename(
                    {DIM_OUT: DIM_IN})
        return activations
Пример #22
0
def test_dot_align_coords(use_dask):
    # GH 3694

    if use_dask:
        if not has_dask:
            pytest.skip("test for dask.")

    a = np.arange(30 * 4).reshape(30, 4)
    b = np.arange(30 * 4 * 5).reshape(30, 4, 5)

    # use partially overlapping coords
    coords_a = {"a": np.arange(30), "b": np.arange(4)}
    coords_b = {"a": np.arange(5, 35), "b": np.arange(1, 5)}

    da_a = xr.DataArray(a, dims=["a", "b"], coords=coords_a)
    da_b = xr.DataArray(b, dims=["a", "b", "c"], coords=coords_b)

    if use_dask:
        da_a = da_a.chunk({"a": 3})
        da_b = da_b.chunk({"a": 3})

    # join="inner" is the default
    actual = xr.dot(da_a, da_b)
    # `dot` sums over the common dimensions of the arguments
    expected = (da_a * da_b).sum(["a", "b"])
    xr.testing.assert_allclose(expected, actual)

    actual = xr.dot(da_a, da_b, dims=...)
    expected = (da_a * da_b).sum()
    xr.testing.assert_allclose(expected, actual)

    with xr.set_options(arithmetic_join="exact"):
        with raises_regex(ValueError, "indexes along dimension"):
            xr.dot(da_a, da_b)

    # NOTE: dot always uses `join="inner"` because `(a * b).sum()` yields the same for all
    # join method (except "exact")
    with xr.set_options(arithmetic_join="left"):
        actual = xr.dot(da_a, da_b)
        expected = (da_a * da_b).sum(["a", "b"])
        xr.testing.assert_allclose(expected, actual)

    with xr.set_options(arithmetic_join="right"):
        actual = xr.dot(da_a, da_b)
        expected = (da_a * da_b).sum(["a", "b"])
        xr.testing.assert_allclose(expected, actual)

    with xr.set_options(arithmetic_join="outer"):
        actual = xr.dot(da_a, da_b)
        expected = (da_a * da_b).sum(["a", "b"])
        xr.testing.assert_allclose(expected, actual)
Пример #23
0
def aggregate_clustersum(ds, cluster, clusterdim):
    """Aggregate a 3-dimensional array over certain points (latitude, longitude).

    Parameters
    ----------
    ds : xr.Dataset
        the array to aggregate (collapse) spatially
    cluster : xr.DataArray
        3-dimensional array (clusterdim, latitude, longitude),
        `clusterdim` contains the True/False mask of points to aggregate over
        e.g. len(clusterdim)=4 means you have 4 clusters
    clusterdim : str
        dimension name to access the different True/False masks

    Returns
    -------
    xr.DataArray
        1-dimensional
    """
    out = xr.Dataset()

    # enforce same coordinates
    interp = True
    if (len(ds.latitude.values) == len(cluster.latitude.values)
            and len(ds.longitude.values) == len(cluster.longitude.values)):
        if (np.allclose(ds.latitude.values, cluster.latitude.values) and
                np.allclose(ds.longitude.values, cluster.longitude.values)):
            interp = False
    if interp:
        ds = ds.interp(latitude=cluster.latitude, longitude=cluster.longitude)
    area_per_gridpoint = calc_area(ds.isel(time=0))

    if isinstance(ds, xr.DataArray):
        ds = ds.to_dataset()

    for var in ds:
        for cl in cluster.coords[clusterdim]:
            newname = var + '_cluster' + str(cl.values)
            this_cluster = cluster.sel({clusterdim: cl})

            da = ds[var].where(this_cluster,
                               0.)  # no contribution from outside cluster
            out[newname] = xr.dot(da, area_per_gridpoint)
    return out.drop(clusterdim)
Пример #24
0
def project_onto_eof(field, eofs, sensor_dims, weight=None):
    """Project a field onto a set of provided EOFs to generate a corresponding set of
    pseudo-PCs

    Parameters
    ----------
    field : xarray DataArray
        Array containing the data to project onto the EOFs
    eofs : xarray DataArray
        Array contain set of EOFs to project onto.
    sensor_dims : str, optional
        EOFs sensor dimension.
    weight : str or xarray DataArray of Dataset
            Weighting to apply prior to projection. This should match the weighting
            used to calculate the eofs (see xeof.eof)

    Returns
    -------
    projection : xarray DataArray
        Array containing the pseudo-PCs

    Examples
    --------
    >>> A = xr.DataArray(np.random.normal(size=(6,4,40)),
    ...                  coords=[('lat', np.arange(-75,76,30)), ('lon', np.arange(45,316,90)),
    ...                          ('time', pd.date_range('2000-01-01', periods=40, freq='M'))])
    >>> eofs = xeof.eof(A, sensor_dims=['lat','lon'])
    >>> project_onto_eof(A, eofs['eof'], sensor_dims=['lat','lon'])
    <xarray.DataArray (mode: 20, time: 40)>
    array([[ ... ]])
    Coordinates:
      * mode     (mode) int64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
      * time     (time) datetime64[ns] 2000-01-31 2000-02-29 ... 2003-04-30

    """

    if not isinstance(field, xr.DataArray):
        raise ValueError("field must be a xarray DataArray")
    if not isinstance(eofs, xr.DataArray):
        raise ValueError("eofs must be a xarray DataArray")

    field_weighted = _apply_weights(field, weight)

    return xr.dot(eofs, field_weighted, dims=sensor_dims)
Пример #25
0
    def reconstructed_fields(self, mode=slice(1, None)):
        '''Reconstruct original input fields based on specified `mode`s.

        Parameters
        ----------
        mode : int, slice
            Modes to be considered for reconstructing the original fields.
            The default is `slice(1, None)` which returns the original fields
            based on *all* modes.

        Returns
        -------
        dict[DataArray, DataArray]
            Left and right reconstructed fields.
        '''
        eofs = self.eofs(scaling=None)
        pcs = self.pcs(scaling='eigen')
        coords = self._field_coords
        std = self._field_stds
        mean = self._field_means

        rec_fields = {}
        for key in self._fields.keys():
            eofs[key] = eofs[key].sel(mode=mode)
            pcs[key] = pcs[key].sel(mode=mode)
            rec_fields[key] = xr.dot(pcs[key],
                                     eofs[key].conjugate(),
                                     dims=['mode'])
            rec_fields[key] = rec_fields[key].real

            if self._analysis['is_coslat_corrected']:
                coslat = np.cos(np.deg2rad(coords[key]['lat']))
                rec_fields[key] /= np.sqrt(coslat)

            if self._analysis['is_normalized']:
                rec_fields[key] *= std[key]

            # add mean fields
            rec_fields[key] += mean[key]

        return rec_fields
Пример #26
0
    def pass_back(
        self,
        activations,
        goal_label,
        func_loss_d=lambda output_v, goal_v: np.subtract(goal_v, output_v)):
        """Backpropagates activations to get gradients

        Arguments:
            activations {dict(xarray[dims: KEY_OUT_PRE or KEY_OUT_POST])} -- dict which is
                the return value of self.pass_forward()
            goal_label {xarray[dims: DIM_LABEL]} -- array of onehot vectors encoded
                along dim=DIM_LABEL

        Keyword Arguments:
            func_loss_d {function(xarray, xarray)} -- derivative of loss function,
                returns gradients of dim=DIM_OUT same size as final layer outputs
                (default: {lambda output_v, goal_v: np.subtract(goal_v, output_v)})

        Returns:
            dict(xarray[dims: DIM_IN, DIM_OUT]) -- dict of gradients, containing
                xarrays in same format as self.matrices
        """

        gradients = {}
        partial_d = func_loss_d(
            activations[mkey(self.num_layers, KEY_OUT_POST)],
            goal_label.rename({DIM_LABEL: DIM_IN}))
        for i in reversed(range(self.num_layers)):
            partial_d = np.multiply(
                partial_d,
                self.func_activation_d(activations[mkey(
                    i + 1, KEY_OUT_PRE)])).rename({DIM_IN: DIM_OUT})
            # times 1, the bias's derivative
            gradients[mkey(i, KEY_BIAS)] = partial_d
            gradients[mkey(i, KEY_WEIGHT)] = np.multiply(
                partial_d, activations[mkey(i, KEY_OUT_POST)])  # times input
            partial_d = xr.dot(partial_d,
                               self.matrices[mkey(i, KEY_WEIGHT)],
                               dims=(DIM_OUT))
        return gradients
Пример #27
0
def build_geographic_beam_vectors(rotgeo: xr.DataArray, beamvecs: xr.DataArray):
    """
    Apply rotation matrix to bring transducer rel. beam vectors to geographic ref frame

    Parameters
    ----------
    rotgeo
        rotation matrices at each time/beam, of shape (beam, rot_i, time, xyz), see return_array_geographic_rotation
    beamvecs
        3d beam vector in co-located array ref frame (xyz, time, beam), see construct_array_relative_beamvector

    Returns
    -------
    xr.DataArray
        beam vectors in geographic ref frame, of shape (time, beam, bv_xyz)
    """

    bv_geo = xr.dot(rotgeo, beamvecs, dims='xyz')
    bv_geo = bv_geo.rename({'rot_i': 'bv_xyz'})
    bv_geo.coords['bv_xyz'] = ['x', 'y', 'z']
    bv_geo = bv_geo.transpose('time', 'beam', 'bv_xyz')
    return bv_geo
def predict_xr(result_ds, regressors):
    """input: results_ds as came out of MLR and saved to file, regressors dataset"""
    # if produce_RI isn't called on data then you should explicitely put time info
    import xarray as xr
    import aux_functions_strat as aux
    rds = result_ds
    regressors = regressors.sel(time=rds.time)  # slice
    regressors = regressors.apply(aux.normalize_xr, norm=1,
                                  verbose=False)  # normalize
    reg_dict = dict(zip(rds.regressors.values, regressors.data_vars.values()))
    # make sure that all the regressors names are linking to their respective dataarrays
    for key, value in reg_dict.items():
        # print(key, value)
        assert value.name == key
    reg_da = xr.concat(reg_dict.values(), dim='regressors')
    reg_da['regressors'] = list(reg_dict.keys())
    reg_da.name = 'regressors_time_series'
    rds['predicted'] = xr.dot(rds.params, reg_da) + rds.intercept
    rds = aux.xr_order(rds)
    # retures the same dataset but with total predicted reconstructed geo-time-series field
    result_ds = rds
    return result_ds
Пример #29
0
def RMSE_persistence(pred, obs):
    """Calculate the RMSE for persistence forecasts.

    Parameters
    ----------
    pred : xr.DataArray
        the prediction array, from which the timestamps are taken
        to verify the persistence forecast on
    obs : xr.DataArray
        the true values to compare with

    TODO: now assuming pred.forecast_day.values is scalar
    """
    inits = pred.init_time.values

    err = np.zeros(len(inits))
    for i, init in enumerate(inits):
        valid_time = pred.sel(init_time=init).time

        persistence = obs.sel(time=init)
        d = persistence - obs.sel(time=valid_time)
        err[i] = float(xr.dot(d, d))

    return np.sqrt(np.mean(err))
Пример #30
0
    def reconstructed_fields(self, mode=slice(1,None)):
        eofs    = self.eofs(scaling=None)
        pcs     = self.pcs(scaling='eigen')
        coords  = self._field_coords
        std     = self._field_stds
        mean    = self._field_means

        rec_fields = {}
        for key in self._fields.keys():
            eofs[key]   = eofs[key].sel(mode=mode)
            pcs[key]    = pcs[key].sel(mode=mode)
            rec_fields[key] = xr.dot(pcs[key],eofs[key].conjugate(),dims=['mode'])
            rec_fields[key] = rec_fields[key].real

            if self._analysis['is_coslat_corrected']:
                rec_fields[key] /= np.sqrt(np.cos(np.deg2rad(coords[key]['lat'])))

            if self._analysis['is_normalized']:
                rec_fields[key] *= std[key]

            # add mean fields
            rec_fields[key]  += mean[key]

        return rec_fields
Пример #31
0
def test_dot(use_dask):
    if use_dask:
        if not has_dask:
            pytest.skip('test for dask.')

    a = np.arange(30 * 4).reshape(30, 4)
    b = np.arange(30 * 4 * 5).reshape(30, 4, 5)
    c = np.arange(5 * 60).reshape(5, 60)
    da_a = xr.DataArray(a, dims=['a', 'b'],
                        coords={'a': np.linspace(0, 1, 30)})
    da_b = xr.DataArray(b, dims=['a', 'b', 'c'],
                        coords={'a': np.linspace(0, 1, 30)})
    da_c = xr.DataArray(c, dims=['c', 'e'])
    if use_dask:
        da_a = da_a.chunk({'a': 3})
        da_b = da_b.chunk({'a': 3})
        da_c = da_c.chunk({'c': 3})

    actual = xr.dot(da_a, da_b, dims=['a', 'b'])
    assert actual.dims == ('c', )
    assert (actual.data == np.einsum('ij,ijk->k', a, b)).all()
    assert isinstance(actual.variable.data, type(da_a.variable.data))

    actual = xr.dot(da_a, da_b)
    assert actual.dims == ('c', )
    assert (actual.data == np.einsum('ij,ijk->k', a, b)).all()
    assert isinstance(actual.variable.data, type(da_a.variable.data))

    if use_dask:
        import dask
        if LooseVersion(dask.__version__) < LooseVersion('0.17.3'):
            pytest.skip("needs dask.array.einsum")

    # for only a single array is passed without dims argument, just return
    # as is
    actual = xr.dot(da_a)
    assert da_a.identical(actual)

    # test for variable
    actual = xr.dot(da_a.variable, da_b.variable)
    assert actual.dims == ('c', )
    assert (actual.data == np.einsum('ij,ijk->k', a, b)).all()
    assert isinstance(actual.data, type(da_a.variable.data))

    if use_dask:
        da_a = da_a.chunk({'a': 3})
        da_b = da_b.chunk({'a': 3})
        actual = xr.dot(da_a, da_b, dims=['b'])
        assert actual.dims == ('a', 'c')
        assert (actual.data == np.einsum('ij,ijk->ik', a, b)).all()
        assert isinstance(actual.variable.data, type(da_a.variable.data))

    actual = xr.dot(da_a, da_b, dims=['b'])
    assert actual.dims == ('a', 'c')
    assert (actual.data == np.einsum('ij,ijk->ik', a, b)).all()

    actual = xr.dot(da_a, da_b, dims='b')
    assert actual.dims == ('a', 'c')
    assert (actual.data == np.einsum('ij,ijk->ik', a, b)).all()

    actual = xr.dot(da_a, da_b, dims='a')
    assert actual.dims == ('b', 'c')
    assert (actual.data == np.einsum('ij,ijk->jk', a, b)).all()

    actual = xr.dot(da_a, da_b, dims='c')
    assert actual.dims == ('a', 'b')
    assert (actual.data == np.einsum('ij,ijk->ij', a, b)).all()

    actual = xr.dot(da_a, da_b, da_c, dims=['a', 'b'])
    assert actual.dims == ('c', 'e')
    assert (actual.data == np.einsum('ij,ijk,kl->kl ', a, b, c)).all()

    # should work with tuple
    actual = xr.dot(da_a, da_b, dims=('c', ))
    assert actual.dims == ('a', 'b')
    assert (actual.data == np.einsum('ij,ijk->ij', a, b)).all()

    # default dims
    actual = xr.dot(da_a, da_b, da_c)
    assert actual.dims == ('e', )
    assert (actual.data == np.einsum('ij,ijk,kl->l ', a, b, c)).all()

    # 1 array summation
    actual = xr.dot(da_a, dims='a')
    assert actual.dims == ('b', )
    assert (actual.data == np.einsum('ij->j ', a)).all()

    # empty dim
    actual = xr.dot(da_a.sel(a=[]), da_a.sel(a=[]), dims='a')
    assert actual.dims == ('b', )
    assert (actual.data == np.zeros(actual.shape)).all()

    # Invalid cases
    if not use_dask or LooseVersion(dask.__version__) > LooseVersion('0.17.4'):
        with pytest.raises(TypeError):
            xr.dot(da_a, dims='a', invalid=None)
    with pytest.raises(TypeError):
        xr.dot(da_a.to_dataset(name='da'), dims='a')
    with pytest.raises(TypeError):
        xr.dot(dims='a')

    # einsum parameters
    actual = xr.dot(da_a, da_b, dims=['b'], order='C')
    assert (actual.data == np.einsum('ij,ijk->ik', a, b)).all()
    assert actual.values.flags['C_CONTIGUOUS']
    assert not actual.values.flags['F_CONTIGUOUS']
    actual = xr.dot(da_a, da_b, dims=['b'], order='F')
    assert (actual.data == np.einsum('ij,ijk->ik', a, b)).all()
    # dask converts Fortran arrays to C order when merging the final array
    if not use_dask:
        assert not actual.values.flags['C_CONTIGUOUS']
        assert actual.values.flags['F_CONTIGUOUS']

    # einsum has a constant string as of the first parameter, which makes
    # it hard to pass to xarray.apply_ufunc.
    # make sure dot() uses functools.partial(einsum, subscripts), which
    # can be pickled, and not a lambda, which can't.
    pickle.loads(pickle.dumps(xr.dot(da_a)))