def call(meta: pd.DataFrame, counts: pd.DataFrame, interactions: pd.DataFrame, genes: pd.DataFrame, complexes: pd.DataFrame, complex_compositions: pd.DataFrame, iterations: int = 1000, threshold: float = 0.1, threads: int = 4, debug_seed=False, round_decimals: int = 1) -> ( pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame): core_logger.info( '[Cluster Statistical Analysis Complex] Threshold:{} Iterations:{} Debug-seed:{} Threads:{}'.format( threshold, iterations, debug_seed, threads)) if debug_seed >= 0: pd.np.random.seed(debug_seed) core_logger.warning('Debug random seed enabled. Setted to {}'.format(debug_seed)) cells_names = sorted(counts.columns) interactions_filtered, counts_filtered, complex_in_counts = prefilters(interactions, counts, genes, complexes, complex_compositions) if interactions_filtered.empty: return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame() complex_significative_protein = get_complex_significative(complex_in_counts, counts_filtered, complex_compositions, cells_names) clusters = cpdb_statistical_analysis_helper.build_clusters(meta, counts_filtered) core_logger.info('Running Real Complex Analysis') cluster_interactions = cpdb_statistical_analysis_helper.get_cluster_combinations(clusters['names']) interactions_processed = get_interactions_processed(interactions_filtered, complex_significative_protein) base_result = cpdb_statistical_analysis_helper.build_result_matrix(interactions_processed, cluster_interactions) real_mean_analysis = cpdb_statistical_analysis_helper.mean_analysis(interactions_processed, clusters, cluster_interactions, base_result) real_percents_analysis = cpdb_statistical_analysis_helper.percent_analysis(clusters, threshold, interactions_processed, cluster_interactions, base_result) statistical_mean_analysis = cpdb_statistical_analysis_helper.shuffled_analysis(iterations, meta, counts_filtered, interactions_processed, cluster_interactions, base_result, threads) result_percent = cpdb_statistical_analysis_helper.build_percent_result(real_mean_analysis, real_percents_analysis, statistical_mean_analysis, interactions_processed, cluster_interactions, base_result) pvalues_result, means_result, significant_means, mean_pvalue_result, deconvoluted_result = build_results( interactions_filtered, real_mean_analysis, result_percent, clusters['means'], complex_compositions, counts, genes, round_decimals ) return pvalues_result, means_result, significant_means, mean_pvalue_result, deconvoluted_result
def call( meta: pd.DataFrame, counts: pd.DataFrame, counts_data: str, interactions: pd.DataFrame, pvalue: float, separator: str, iterations: int = 1000, threshold: float = 0.1, threads: int = 4, debug_seed: int = -1, result_precision: int = 3, ) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame): core_logger.info( '[Cluster Statistical Analysis Simple] ' 'Threshold:{} Iterations:{} Debug-seed:{} Threads:{} Precision:{}'. format(threshold, iterations, debug_seed, threads, result_precision)) if debug_seed >= 0: pd.np.random.seed(debug_seed) core_logger.warning( 'Debug random seed enabled. Setted to {}'.format(debug_seed)) interactions_filtered, counts_filtered = prefilters( counts, interactions, counts_data) if interactions_filtered.empty or counts_filtered.empty: return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame() clusters = cpdb_statistical_analysis_helper.build_clusters( meta, counts_filtered) core_logger.info('Running Real Simple Analysis') cluster_interactions = cpdb_statistical_analysis_helper.get_cluster_combinations( clusters['names']) base_result = cpdb_statistical_analysis_helper.build_result_matrix( interactions_filtered, cluster_interactions, separator) real_mean_analysis = cpdb_statistical_analysis_helper.mean_analysis( interactions_filtered, clusters, cluster_interactions, base_result, separator, suffixes=('_1', '_2'), counts_data=counts_data) real_percent_analysis = cpdb_statistical_analysis_helper.percent_analysis( clusters, threshold, interactions_filtered, cluster_interactions, base_result, separator, suffixes=('_1', '_2'), counts_data=counts_data) statistical_mean_analysis = cpdb_statistical_analysis_helper.shuffled_analysis( iterations, meta, counts_filtered, interactions_filtered, cluster_interactions, base_result, threads, separator, suffixes=('_1', '_2'), counts_data=counts_data) result_percent = cpdb_statistical_analysis_helper.build_percent_result( real_mean_analysis, real_percent_analysis, statistical_mean_analysis, interactions_filtered, cluster_interactions, base_result, separator) pvalues_result, means_result, significant_means, deconvoluted_result = build_results( interactions_filtered, real_mean_analysis, result_percent, clusters['means'], result_precision, pvalue, counts_data) return pvalues_result, means_result, significant_means, deconvoluted_result
def call(meta: pd.DataFrame, counts: pd.DataFrame, counts_data: str, interactions: pd.DataFrame, genes: pd.DataFrame, complexes: pd.DataFrame, complex_compositions: pd.DataFrame, pvalue: float, separator: str, iterations: int = 1000, threshold: float = 0.1, threads: int = 4, debug_seed: int = -1, result_precision: int = 3, ) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame): core_logger.info( '[Cluster Statistical Analysis] ' 'Threshold:{} Iterations:{} Debug-seed:{} Threads:{} Precision:{}'.format(threshold, iterations, debug_seed, threads, result_precision)) if debug_seed >= 0: np.random.seed(debug_seed) core_logger.warning('Debug random seed enabled. Setted to {}'.format(debug_seed)) cells_names = sorted(counts.columns) interactions.set_index('id_interaction', drop=True, inplace=True) interactions_reduced = interactions[['multidata_1_id', 'multidata_2_id']].drop_duplicates() complex_compositions.set_index('id_complex_composition', inplace=True, drop=True) # Add id multidata to counts input counts: pd.DataFrame = counts.merge(genes[['id_multidata', 'ensembl', 'gene_name', 'hgnc_symbol']], left_index=True, right_on=counts_data) counts_relations = counts[['id_multidata', 'ensembl', 'gene_name', 'hgnc_symbol']].copy() counts.set_index('id_multidata', inplace=True, drop=True) counts = counts[cells_names] counts = counts.astype('float32') counts = counts.groupby(counts.index).mean() if counts.empty: raise AllCountsFilteredException(hint='Are you using human data?') # End add id multidata interactions_filtered, counts_filtered, complex_composition_filtered = \ cpdb_statistical_analysis_helper.prefilters(interactions_reduced, counts, complexes, complex_compositions) if interactions_filtered.empty: raise NoInteractionsFound() clusters = cpdb_statistical_analysis_helper.build_clusters(meta, counts_filtered, complex_composition_filtered) core_logger.info('Running Real Analysis') cluster_interactions = cpdb_statistical_analysis_helper.get_cluster_combinations(clusters['names']) base_result = cpdb_statistical_analysis_helper.build_result_matrix(interactions_filtered, cluster_interactions, separator) real_mean_analysis = cpdb_statistical_analysis_helper.mean_analysis(interactions_filtered, clusters, cluster_interactions, base_result, separator) real_percents_analysis = cpdb_statistical_analysis_helper.percent_analysis(clusters, threshold, interactions_filtered, cluster_interactions, base_result, separator) core_logger.info('Running Statistical Analysis') statistical_mean_analysis = cpdb_statistical_analysis_helper.shuffled_analysis(iterations, meta, counts_filtered, interactions_filtered, cluster_interactions, complex_composition_filtered, base_result, threads, separator) result_percent = cpdb_statistical_analysis_helper.build_percent_result(real_mean_analysis, real_percents_analysis, statistical_mean_analysis, interactions_filtered, cluster_interactions, base_result, separator) pvalues_result, means_result, significant_means, deconvoluted_result = build_results( interactions_filtered, interactions, counts_relations, real_mean_analysis, result_percent, clusters['means'], complex_composition_filtered, counts, genes, result_precision, pvalue, counts_data ) return pvalues_result, means_result, significant_means, deconvoluted_result