示例#1
0
def ttest_ind(a, b, axis=0, equal_var=True):
    v1 = da.var(a, axis, ddof=1)  # XXX: np -> da
    v2 = da.var(b, axis, ddof=1)  # XXX: np -> da
    n1 = a.shape[axis]
    n2 = b.shape[axis]

    if equal_var:
        df, denom = _equal_var_ttest_denom(v1, n1, v2, n2)
    else:
        df, denom = _unequal_var_ttest_denom(v1, n1, v2, n2)

    res = _ttest_ind_from_stats(da.mean(a, axis), da.mean(b, axis), denom, df)

    return delayed(Ttest_indResult, nout=2)(*res)
示例#2
0
def ttest_ind(a, b, axis=0, equal_var=True):
    v1 = da.var(a, axis, ddof=1)  # XXX: np -> da
    v2 = da.var(b, axis, ddof=1)  # XXX: np -> da
    n1 = a.shape[axis]
    n2 = b.shape[axis]

    if equal_var:
        df, denom = _equal_var_ttest_denom(v1, n1, v2, n2)
    else:
        df, denom = _unequal_var_ttest_denom(v1, n1, v2, n2)

    res = _ttest_ind_from_stats(da.mean(a, axis), da.mean(b, axis), denom, df)

    return delayed(Ttest_indResult, nout=2)(*res)
示例#3
0
def ttest_1samp(a, popmean, axis=0, nan_policy="propagate"):
    if nan_policy != "propagate":
        raise NotImplementedError(
            "`nan_policy` other than 'propagate' have not been implemented.")
    n = a.shape[axis]
    df = n - 1

    d = da.mean(a, axis) - popmean
    v = da.var(a, axis, ddof=1)
    denom = da.sqrt(v / float(n))

    with np.errstate(divide="ignore", invalid="ignore"):
        t = da.divide(d, denom)
    t, prob = _ttest_finish(df, t)
    return delayed(Ttest_1sampResult, nout=2)(t, prob)
示例#4
0
def ttest_1samp(a, popmean, axis=0, nan_policy='propagate'):
    if nan_policy != 'propagate':
        raise NotImplementedError("`nan_policy` other than 'propagate' "
                                  "have not been implemented.")
    n = a.shape[axis]
    df = n - 1

    d = da.mean(a, axis) - popmean
    v = da.var(a, axis, ddof=1)
    denom = da.sqrt(v / float(n))

    with np.errstate(divide='ignore', invalid='ignore'):
        t = da.divide(d, denom)
    t, prob = _ttest_finish(df, t)
    return delayed(Ttest_1sampResult, nout=2)(t, prob)
示例#5
0
def ttest_rel(a, b, axis=0, nan_policy="propagate"):
    if nan_policy != "propagate":
        raise NotImplementedError(
            "`nan_policy` other than 'propagate' have not been implemented.")

    n = a.shape[axis]
    df = float(n - 1)

    d = (a - b).astype(np.float64)
    v = da.var(d, axis, ddof=1)
    dm = da.mean(d, axis)
    denom = da.sqrt(v / float(n))

    with np.errstate(divide="ignore", invalid="ignore"):
        t = da.divide(dm, denom)
    t, prob = _ttest_finish(df, t)

    return delayed(Ttest_relResult, nout=2)(t, prob)
示例#6
0
def ttest_rel(a, b, axis=0, nan_policy='propagate'):
    if nan_policy != 'propagate':
        raise NotImplementedError("`nan_policy` other than 'propagate' "
                                  "have not been implemented.")

    n = a.shape[axis]
    df = float(n - 1)

    d = (a - b).astype(np.float64)
    v = da.var(d, axis, ddof=1)
    dm = da.mean(d, axis)
    denom = da.sqrt(v / float(n))

    with np.errstate(divide='ignore', invalid='ignore'):
        t = da.divide(dm, denom)
    t, prob = _ttest_finish(df, t)

    return delayed(Ttest_relResult, nout=2)(t, prob)
示例#7
0
def calc_eofs(data, num_eigs, ret_pcs=False, var_stats_dict=None):
    """
    Method to calculate the EOFs of given  dataset.  This assumes data comes in as
    an m x n matrix where m is the temporal dimension and n is the spatial
    dimension.

    Parameters
    ----------
    data: ndarray
        Dataset to calculate EOFs from
    num_eigs: int
        Number of eigenvalues/vectors to return.  Must be less than min(m, n).
    ret_pcs: bool, optional
        Return principal component matrix along with EOFs
    var_stats_dict: dict, optional
        Dictionary target to star some simple statistics about the EOF
        calculation.  Note: if this is provided for a dask array it prompts two
        SVD calculations for both the compressed and full singular values.

    Returns
    -------
    eofs: ndarray
        The eofs (as column vectors) of the data with dimensions n x k where
        k is the num_eigs.
    svals: ndarray
        Singular values from the svd decomposition.  Returned as a row vector
        in order from largest to smallest.
    """

    if is_dask_array(data):
        pcs, full_svals, eofs = da.linalg.svd_compressed(data, num_eigs)
        var = da.var(data, axis=0)

        out_svals = np.zeros(num_eigs)
        out_eofs = np.zeros((num_eigs, data.shape[1]))
        out_pcs = np.zeros((data.shape[0], num_eigs))
        out_var = np.zeros((data.shape[1]))
        da.store([eofs, full_svals, pcs, var],
                 [out_eofs, out_svals, out_pcs, out_var])

        out_eofs = out_eofs.T
        out_pcs = out_pcs.T

    else:
        eofs, full_svals, pcs = svd(data[:].T, full_matrices=False)
        out_eofs = eofs[:, :num_eigs]
        out_svals = full_svals[:num_eigs]
        out_pcs = pcs[:num_eigs]
        out_var = data[:].var(ddof=1, axis=0)

    # variance stats
    if var_stats_dict is not None:
        try:
            nt = data.shape[0]
            ns = data.shape[1]
            eig_vals = (out_svals**2) / nt
            total_var = out_var.sum()
            var_expl_by_mode = eig_vals / total_var
            var_expl_by_retained = var_expl_by_mode[0:num_eigs].sum()

            var_stats_dict['nt'] = nt
            var_stats_dict['ns'] = ns
            var_stats_dict['eigvals'] = eig_vals
            var_stats_dict['num_ret_modes'] = num_eigs
            var_stats_dict['total_var'] = total_var
            var_stats_dict['var_expl_by_mode'] = var_expl_by_mode
            var_stats_dict['var_expl_by_ret'] = var_expl_by_retained
        except TypeError as e:
            print('Must past dictionary type to var_stats_dict in order to ' \
                  'output variance statistics.')
            print(e)

    if ret_pcs:
        return out_eofs, out_svals, out_pcs
    else:
        return out_eofs, out_svals