def call(meta: pd.DataFrame, counts: pd.DataFrame, interactions: pd.DataFrame, genes: pd.DataFrame,
         complexes: pd.DataFrame, complex_compositions: pd.DataFrame, iterations: int = 1000, threshold: float = 0.1,
         threads: int = 4, debug_seed=False, round_decimals: int = 1) -> (
        pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame):
    core_logger.info(
        '[Cluster Statistical Analysis Complex] Threshold:{} Iterations:{} Debug-seed:{} Threads:{}'.format(
            threshold, iterations, debug_seed, threads))
    if debug_seed >= 0:
        pd.np.random.seed(debug_seed)
        core_logger.warning('Debug random seed enabled. Setted to {}'.format(debug_seed))

    cells_names = sorted(counts.columns)

    interactions_filtered, counts_filtered, complex_in_counts = prefilters(interactions, counts, genes, complexes,
                                                                           complex_compositions)
    if interactions_filtered.empty:
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

    complex_significative_protein = get_complex_significative(complex_in_counts, counts_filtered, complex_compositions,
                                                              cells_names)

    clusters = cpdb_statistical_analysis_helper.build_clusters(meta, counts_filtered)
    core_logger.info('Running Real Complex Analysis')

    cluster_interactions = cpdb_statistical_analysis_helper.get_cluster_combinations(clusters['names'])
    interactions_processed = get_interactions_processed(interactions_filtered, complex_significative_protein)

    base_result = cpdb_statistical_analysis_helper.build_result_matrix(interactions_processed, cluster_interactions)

    real_mean_analysis = cpdb_statistical_analysis_helper.mean_analysis(interactions_processed, clusters,
                                                                        cluster_interactions, base_result)

    real_percents_analysis = cpdb_statistical_analysis_helper.percent_analysis(clusters, threshold,
                                                                               interactions_processed,
                                                                               cluster_interactions,
                                                                               base_result)

    statistical_mean_analysis = cpdb_statistical_analysis_helper.shuffled_analysis(iterations, meta, counts_filtered,
                                                                                   interactions_processed,
                                                                                   cluster_interactions, base_result,
                                                                                   threads)

    result_percent = cpdb_statistical_analysis_helper.build_percent_result(real_mean_analysis,
                                                                           real_percents_analysis,
                                                                           statistical_mean_analysis,
                                                                           interactions_processed,
                                                                           cluster_interactions, base_result)
    pvalues_result, means_result, significant_means, mean_pvalue_result, deconvoluted_result = build_results(
        interactions_filtered,
        real_mean_analysis,
        result_percent,
        clusters['means'],
        complex_compositions,
        counts,
        genes,
        round_decimals
    )
    return pvalues_result, means_result, significant_means, mean_pvalue_result, deconvoluted_result
def call(
    meta: pd.DataFrame,
    counts: pd.DataFrame,
    counts_data: str,
    interactions: pd.DataFrame,
    pvalue: float,
    separator: str,
    iterations: int = 1000,
    threshold: float = 0.1,
    threads: int = 4,
    debug_seed: int = -1,
    result_precision: int = 3,
) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame):
    core_logger.info(
        '[Cluster Statistical Analysis Simple] '
        'Threshold:{} Iterations:{} Debug-seed:{} Threads:{} Precision:{}'.
        format(threshold, iterations, debug_seed, threads, result_precision))

    if debug_seed >= 0:
        pd.np.random.seed(debug_seed)
        core_logger.warning(
            'Debug random seed enabled. Setted to {}'.format(debug_seed))

    interactions_filtered, counts_filtered = prefilters(
        counts, interactions, counts_data)

    if interactions_filtered.empty or counts_filtered.empty:
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

    clusters = cpdb_statistical_analysis_helper.build_clusters(
        meta, counts_filtered)
    core_logger.info('Running Real Simple Analysis')
    cluster_interactions = cpdb_statistical_analysis_helper.get_cluster_combinations(
        clusters['names'])

    base_result = cpdb_statistical_analysis_helper.build_result_matrix(
        interactions_filtered, cluster_interactions, separator)

    real_mean_analysis = cpdb_statistical_analysis_helper.mean_analysis(
        interactions_filtered,
        clusters,
        cluster_interactions,
        base_result,
        separator,
        suffixes=('_1', '_2'),
        counts_data=counts_data)

    real_percent_analysis = cpdb_statistical_analysis_helper.percent_analysis(
        clusters,
        threshold,
        interactions_filtered,
        cluster_interactions,
        base_result,
        separator,
        suffixes=('_1', '_2'),
        counts_data=counts_data)

    statistical_mean_analysis = cpdb_statistical_analysis_helper.shuffled_analysis(
        iterations,
        meta,
        counts_filtered,
        interactions_filtered,
        cluster_interactions,
        base_result,
        threads,
        separator,
        suffixes=('_1', '_2'),
        counts_data=counts_data)

    result_percent = cpdb_statistical_analysis_helper.build_percent_result(
        real_mean_analysis, real_percent_analysis, statistical_mean_analysis,
        interactions_filtered, cluster_interactions, base_result, separator)

    pvalues_result, means_result, significant_means, deconvoluted_result = build_results(
        interactions_filtered, real_mean_analysis, result_percent,
        clusters['means'], result_precision, pvalue, counts_data)

    return pvalues_result, means_result, significant_means, deconvoluted_result
示例#3
0
def call(meta: pd.DataFrame,
         counts: pd.DataFrame,
         counts_data: str,
         interactions: pd.DataFrame,
         genes: pd.DataFrame,
         complexes: pd.DataFrame,
         complex_compositions: pd.DataFrame,
         pvalue: float,
         separator: str,
         iterations: int = 1000,
         threshold: float = 0.1,
         threads: int = 4,
         debug_seed: int = -1,
         result_precision: int = 3,
         ) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame):
    core_logger.info(
        '[Cluster Statistical Analysis] '
        'Threshold:{} Iterations:{} Debug-seed:{} Threads:{} Precision:{}'.format(threshold,
                                                                                  iterations,
                                                                                  debug_seed,
                                                                                  threads,
                                                                                  result_precision))
    if debug_seed >= 0:
        np.random.seed(debug_seed)
        core_logger.warning('Debug random seed enabled. Setted to {}'.format(debug_seed))
    cells_names = sorted(counts.columns)

    interactions.set_index('id_interaction', drop=True, inplace=True)
    interactions_reduced = interactions[['multidata_1_id', 'multidata_2_id']].drop_duplicates()

    complex_compositions.set_index('id_complex_composition', inplace=True, drop=True)
    # Add id multidata to counts input
    counts: pd.DataFrame = counts.merge(genes[['id_multidata', 'ensembl', 'gene_name', 'hgnc_symbol']],
                                        left_index=True, right_on=counts_data)
    counts_relations = counts[['id_multidata', 'ensembl', 'gene_name', 'hgnc_symbol']].copy()

    counts.set_index('id_multidata', inplace=True, drop=True)
    counts = counts[cells_names]
    counts = counts.astype('float32')
    counts = counts.groupby(counts.index).mean()

    if counts.empty:
        raise AllCountsFilteredException(hint='Are you using human data?')
    # End add id multidata

    interactions_filtered, counts_filtered, complex_composition_filtered = \
        cpdb_statistical_analysis_helper.prefilters(interactions_reduced,
                                                    counts,
                                                    complexes,
                                                    complex_compositions)

    if interactions_filtered.empty:
        raise NoInteractionsFound()

    clusters = cpdb_statistical_analysis_helper.build_clusters(meta, counts_filtered, complex_composition_filtered)
    core_logger.info('Running Real Analysis')

    cluster_interactions = cpdb_statistical_analysis_helper.get_cluster_combinations(clusters['names'])

    base_result = cpdb_statistical_analysis_helper.build_result_matrix(interactions_filtered,
                                                                       cluster_interactions,
                                                                       separator)

    real_mean_analysis = cpdb_statistical_analysis_helper.mean_analysis(interactions_filtered,
                                                                        clusters,
                                                                        cluster_interactions,
                                                                        base_result,
                                                                        separator)

    real_percents_analysis = cpdb_statistical_analysis_helper.percent_analysis(clusters,
                                                                               threshold,
                                                                               interactions_filtered,
                                                                               cluster_interactions,
                                                                               base_result,
                                                                               separator)

    core_logger.info('Running Statistical Analysis')
    statistical_mean_analysis = cpdb_statistical_analysis_helper.shuffled_analysis(iterations,
                                                                                   meta,
                                                                                   counts_filtered,
                                                                                   interactions_filtered,
                                                                                   cluster_interactions,
                                                                                   complex_composition_filtered,
                                                                                   base_result,
                                                                                   threads,
                                                                                   separator)

    result_percent = cpdb_statistical_analysis_helper.build_percent_result(real_mean_analysis,
                                                                           real_percents_analysis,
                                                                           statistical_mean_analysis,
                                                                           interactions_filtered,
                                                                           cluster_interactions,
                                                                           base_result,
                                                                           separator)

    pvalues_result, means_result, significant_means, deconvoluted_result = build_results(
        interactions_filtered,
        interactions,
        counts_relations,
        real_mean_analysis,
        result_percent,
        clusters['means'],
        complex_composition_filtered,
        counts,
        genes,
        result_precision,
        pvalue,
        counts_data
    )
    return pvalues_result, means_result, significant_means, deconvoluted_result