def ttest_ind(a, b, axis=0, equal_var=True): v1 = da.var(a, axis, ddof=1) # XXX: np -> da v2 = da.var(b, axis, ddof=1) # XXX: np -> da n1 = a.shape[axis] n2 = b.shape[axis] if equal_var: df, denom = _equal_var_ttest_denom(v1, n1, v2, n2) else: df, denom = _unequal_var_ttest_denom(v1, n1, v2, n2) res = _ttest_ind_from_stats(da.mean(a, axis), da.mean(b, axis), denom, df) return delayed(Ttest_indResult, nout=2)(*res)
def ttest_ind(a, b, axis=0, equal_var=True): v1 = da.var(a, axis, ddof=1) # XXX: np -> da v2 = da.var(b, axis, ddof=1) # XXX: np -> da n1 = a.shape[axis] n2 = b.shape[axis] if equal_var: df, denom = _equal_var_ttest_denom(v1, n1, v2, n2) else: df, denom = _unequal_var_ttest_denom(v1, n1, v2, n2) res = _ttest_ind_from_stats(da.mean(a, axis), da.mean(b, axis), denom, df) return delayed(Ttest_indResult, nout=2)(*res)
def ttest_1samp(a, popmean, axis=0, nan_policy="propagate"): if nan_policy != "propagate": raise NotImplementedError( "`nan_policy` other than 'propagate' have not been implemented.") n = a.shape[axis] df = n - 1 d = da.mean(a, axis) - popmean v = da.var(a, axis, ddof=1) denom = da.sqrt(v / float(n)) with np.errstate(divide="ignore", invalid="ignore"): t = da.divide(d, denom) t, prob = _ttest_finish(df, t) return delayed(Ttest_1sampResult, nout=2)(t, prob)
def ttest_1samp(a, popmean, axis=0, nan_policy='propagate'): if nan_policy != 'propagate': raise NotImplementedError("`nan_policy` other than 'propagate' " "have not been implemented.") n = a.shape[axis] df = n - 1 d = da.mean(a, axis) - popmean v = da.var(a, axis, ddof=1) denom = da.sqrt(v / float(n)) with np.errstate(divide='ignore', invalid='ignore'): t = da.divide(d, denom) t, prob = _ttest_finish(df, t) return delayed(Ttest_1sampResult, nout=2)(t, prob)
def ttest_rel(a, b, axis=0, nan_policy="propagate"): if nan_policy != "propagate": raise NotImplementedError( "`nan_policy` other than 'propagate' have not been implemented.") n = a.shape[axis] df = float(n - 1) d = (a - b).astype(np.float64) v = da.var(d, axis, ddof=1) dm = da.mean(d, axis) denom = da.sqrt(v / float(n)) with np.errstate(divide="ignore", invalid="ignore"): t = da.divide(dm, denom) t, prob = _ttest_finish(df, t) return delayed(Ttest_relResult, nout=2)(t, prob)
def ttest_rel(a, b, axis=0, nan_policy='propagate'): if nan_policy != 'propagate': raise NotImplementedError("`nan_policy` other than 'propagate' " "have not been implemented.") n = a.shape[axis] df = float(n - 1) d = (a - b).astype(np.float64) v = da.var(d, axis, ddof=1) dm = da.mean(d, axis) denom = da.sqrt(v / float(n)) with np.errstate(divide='ignore', invalid='ignore'): t = da.divide(dm, denom) t, prob = _ttest_finish(df, t) return delayed(Ttest_relResult, nout=2)(t, prob)
def calc_eofs(data, num_eigs, ret_pcs=False, var_stats_dict=None): """ Method to calculate the EOFs of given dataset. This assumes data comes in as an m x n matrix where m is the temporal dimension and n is the spatial dimension. Parameters ---------- data: ndarray Dataset to calculate EOFs from num_eigs: int Number of eigenvalues/vectors to return. Must be less than min(m, n). ret_pcs: bool, optional Return principal component matrix along with EOFs var_stats_dict: dict, optional Dictionary target to star some simple statistics about the EOF calculation. Note: if this is provided for a dask array it prompts two SVD calculations for both the compressed and full singular values. Returns ------- eofs: ndarray The eofs (as column vectors) of the data with dimensions n x k where k is the num_eigs. svals: ndarray Singular values from the svd decomposition. Returned as a row vector in order from largest to smallest. """ if is_dask_array(data): pcs, full_svals, eofs = da.linalg.svd_compressed(data, num_eigs) var = da.var(data, axis=0) out_svals = np.zeros(num_eigs) out_eofs = np.zeros((num_eigs, data.shape[1])) out_pcs = np.zeros((data.shape[0], num_eigs)) out_var = np.zeros((data.shape[1])) da.store([eofs, full_svals, pcs, var], [out_eofs, out_svals, out_pcs, out_var]) out_eofs = out_eofs.T out_pcs = out_pcs.T else: eofs, full_svals, pcs = svd(data[:].T, full_matrices=False) out_eofs = eofs[:, :num_eigs] out_svals = full_svals[:num_eigs] out_pcs = pcs[:num_eigs] out_var = data[:].var(ddof=1, axis=0) # variance stats if var_stats_dict is not None: try: nt = data.shape[0] ns = data.shape[1] eig_vals = (out_svals**2) / nt total_var = out_var.sum() var_expl_by_mode = eig_vals / total_var var_expl_by_retained = var_expl_by_mode[0:num_eigs].sum() var_stats_dict['nt'] = nt var_stats_dict['ns'] = ns var_stats_dict['eigvals'] = eig_vals var_stats_dict['num_ret_modes'] = num_eigs var_stats_dict['total_var'] = total_var var_stats_dict['var_expl_by_mode'] = var_expl_by_mode var_stats_dict['var_expl_by_ret'] = var_expl_by_retained except TypeError as e: print('Must past dictionary type to var_stats_dict in order to ' \ 'output variance statistics.') print(e) if ret_pcs: return out_eofs, out_svals, out_pcs else: return out_eofs, out_svals