def find_and_save_cc_net_nmf_clusters_parallel(network_mat, spreadsheet_mat, lap_diag, lap_pos, run_parameters, local_parallelism): """ central loop: compute components for the consensus matrix from the input network and spreadsheet matrices and save them to temp files. Args: network_mat: genes x genes symmetric matrix. spreadsheet_mat: genes x samples matrix. lap_dag: laplacian matrix component, L = lap_dag - lap_val. lap_val: laplacian matrix component, L = lap_dag - lap_val. run_parameters: dictionary of run-time parameters. number_of_cpus: number of processes to be running in parallel """ jobs_id = range(0, local_parallelism) zipped_arguments = dstutil.zip_parameters(network_mat, spreadsheet_mat, lap_diag, lap_pos, run_parameters, jobs_id) if 'parallelism' in run_parameters: parallelism = dstutil.determine_parallelism_locally( local_parallelism, run_parameters['parallelism']) else: parallelism = dstutil.determine_parallelism_locally(local_parallelism) dstutil.parallelize_processes_locally(run_cc_net_nmf_clusters_worker, zipped_arguments, parallelism)
def find_and_save_cc_similarity_parallel(expression_df, signature_df, run_parameters, local_parallelism): """ central loop: compute components for the similarity matrix by Args: expression_df : genes x samples signature_df : genes x samples run_parameters : dictionary of run-time parameters local_parallelism: parallelism option """ import knpackage.distributed_computing_utils as dstutil jobs_id = range(0, local_parallelism) zipped_arguments = dstutil.zip_parameters(expression_df, signature_df, run_parameters, jobs_id) if 'parallelism' in run_parameters: parallelism = dstutil.determine_parallelism_locally( local_parallelism, run_parameters['parallelism']) else: parallelism = dstutil.determine_parallelism_locally(local_parallelism) dstutil.parallelize_processes_locally(run_cc_similarity_signature_worker, zipped_arguments, parallelism)
def find_and_save_cc_link_hclust_clusters_parallel(spreadsheet_mat, run_parameters, local_parallelism): #----------------------------------------------------- """ central loop: compute components for the consensus matrix by hclust. Args: spreadsheet_mat: genes x samples matrix. run_parameters: dictionary of run-time parameters. number_of_cpus: number of processes to be running in parallel """ import knpackage.distributed_computing_utils as dstutil jobs_id = range(0, local_parallelism) zipped_arguments = dstutil.zip_parameters(spreadsheet_mat, run_parameters, jobs_id) if 'parallelism' in run_parameters: parallelism = dstutil.determine_parallelism_locally( local_parallelism, run_parameters['parallelism']) else: parallelism = dstutil.determine_parallelism_locally(local_parallelism) dstutil.parallelize_processes_locally(run_cc_link_hclust_clusters_worker, zipped_arguments, parallelism)
def find_and_save_cc_nmf_clusters_parallel(spreadsheet_mat, run_parameters, local_parallelism): """ central loop: compute components for the consensus matrix by non-negative matrix factorization. Args: spreadsheet_mat: genes x samples matrix. run_parameters: dictionary of run-time parameters. number_of_cpus: number of processes to be running in parallel """ jobs_id = range(0, local_parallelism) zipped_arguments = dstutil.zip_parameters(spreadsheet_mat, run_parameters, jobs_id) if 'parallelism' in run_parameters: parallelism = dstutil.determine_parallelism_locally( local_parallelism, run_parameters['parallelism']) else: parallelism = dstutil.determine_parallelism_locally(local_parallelism) dstutil.parallelize_processes_locally(run_cc_nmf_clusters_worker, zipped_arguments, parallelism)
def get_fisher_exact_test(prop_gene_network_sparse, sparse_dict, spreadsheet_df, max_cpu): """ central loop: compute components for fisher exact test. Args: prop_gene_network_sparse: sparse matrix of network gene set. sparse_dict: look up table of sparse matrix. spreadsheet_df: the dataframe of user gene set. max_cpu: the maximum number of processors to use. Returns: fisher_contingency_pval: list of seven items lists. """ universe_count = spreadsheet_df.shape[0] overlap_count = prop_gene_network_sparse.T.dot(spreadsheet_df.values) user_count = np.sum(spreadsheet_df.values, axis=0) gene_count = prop_gene_network_sparse.sum(axis=0) set_list = spreadsheet_df.columns.values dimension = [range(overlap_count.shape[0]), range(overlap_count.shape[1])] combinations = list(itertools.product(*dimension)) parallelism = dstutil.determine_parallelism_locally(min(max_cpu, len(combinations))) #---- try: #---- p = multiprocessing.Pool(processes=parallelism) p.starmap_async(fisher_exact_worker , zip( itertools.repeat(sparse_dict) , itertools.repeat(overlap_count) , itertools.repeat(user_count) , itertools.repeat(gene_count) , itertools.repeat(universe_count) , itertools.repeat(set_list) , combinations ) , callback=callback_extend_list ) p.close() p.join() # print(fisher_contingency_pval_parallel_insertion) # print(type(fisher_contingency_pval_parallel_insertion)) return fisher_contingency_pval_parallel_insertion #------- except: #------- raise OSError("Failed running parallel processing:{}".format(sys.exc_info()))