Exemplo n.º 1
0
def _mat_mat_corr_sparse(
    X: csr_matrix,
    Y: np.ndarray,
) -> np.ndarray:
    """\
    This function is borrow from cellrank
    """
    n = X.shape[1]

    X_bar = np.reshape(np.array(X.mean(axis=1)), (-1, 1))
    X_std = np.reshape(np.sqrt(np.array(X.power(2).mean(axis=1)) - (X_bar**2)),
                       (-1, 1))

    y_bar = np.reshape(np.mean(Y, axis=0), (1, -1))
    y_std = np.reshape(np.std(Y, axis=0), (1, -1))

    with np.warnings.catch_warnings():
        np.warnings.filterwarnings(
            "ignore", r"invalid value encountered in true_divide")
        return (X @ Y - (n * X_bar * y_bar)) / ((n - 1) * X_std * y_std)
Exemplo n.º 2
0
def t_test(
    X: csr_matrix,
    cluster_labels: List[str],
    cond_labels: List[str],
    gene_names: List[str],
    n_jobs: int,
    temp_folder: str,
    verbose: bool,
) -> List[pd.DataFrame]:
    """ Run Welch's t-test, triggering calc_t in parallel
    """
    start = time.time()

    sum_vec = sum2_vec = None
    if cond_labels is None:
        sum_vec = X.sum(axis=0).A1
        sum2_vec = X.power(2).sum(axis=0).A1

    result_list = Parallel(n_jobs=n_jobs,
                           max_nbytes=1e7,
                           temp_folder=temp_folder)(delayed(calc_t)(
                               clust_id,
                               X.data,
                               X.indices,
                               X.indptr,
                               X.shape,
                               cluster_labels,
                               cond_labels,
                               gene_names,
                               sum_vec,
                               sum2_vec,
                               verbose,
                           ) for clust_id in cluster_labels.categories)

    end = time.time()
    if verbose:
        logger.info(
            "Welch's t-test is done. Time spent = {:.2f}s.".format(end -
                                                                   start))

    return result_list
Exemplo n.º 3
0
def axis_norms(X: sparse.csr_matrix,
               norm: str = "l1",
               axis: int = 1) -> np.ndarray:
    if norm == "l1":
        return np.asarray(X.sum(axis=axis)).reshape(-1)
    return np.sqrt(np.asarray(X.power(2).sum(axis=axis)).reshape(-1))