def standard_size_converter(total_matrix, chrom): indexes_p, indexes_e, total_p, total_e = chrom_specific_negative_interactions.initialise_variables(chrom)[2:] length_chr = len(indexes_p) + len(indexes_e) interaction_matrix = np.zeros((length_chr, length_chr), bool) chrom_pro_not_survived = config_variables.dict_chrom_pro_not_survived[chrom] chrom_enh_not_survived = config_variables.dict_chrom_enh_not_survived[chrom] dict_chrom_proximal = config_variables.dict_chrom_proximal if len(chrom_pro_not_survived): interaction_matrix[chrom_pro_not_survived - total_p, :] = True if len(chrom_enh_not_survived): interaction_matrix[ :, len(indexes_p) + chrom_enh_not_survived - total_e ] = True # gets rid of filtered out enhancers which could be causing nans due to their correlations if config_variables.distant_enh_only and len(dict_chrom_proximal[chrom]): interaction_matrix[:, len(indexes_p) + dict_chrom_proximal[chrom] - total_e] = True interaction_matrix = np.invert(interaction_matrix + interaction_matrix.T) temp_expanded_total_matrix = np.zeros(length_chr * length_chr, int) temp_expanded_total_matrix[np.ravel(interaction_matrix)] = np.ravel(total_matrix) expanded_total_matrix = temp_expanded_total_matrix.reshape(length_chr, length_chr) return expanded_total_matrix
def interactions_extractor(chrom): true_pro_enh_indexes = un_string(config_variables.chr_interactions_dict_pro_enh[chrom]) prom_enh_false_interactions = chrom_specific_negative_interactions.chrom_specific_negative_interactions(chrom, mode) enh_coordinates, pro_coordinates, indexes_p, indexes_e, total_p, total_e = chrom_specific_negative_interactions.initialise_variables(chrom) true_pro_enh_indexes[:,0] = true_pro_enh_indexes[:,0] - total_p true_pro_enh_indexes[:,1] = true_pro_enh_indexes[:,1] - total_e return true_pro_enh_indexes, prom_enh_false_interactions
def inter_enhancer(chrom): negative_interactions = config_variables.negative_interactions indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(chrom)[2:] if config_variables.disentagled_features_validation: chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom] else: chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[chrom] true_inter_pro = un_string(chr_interactions_pro_enh[:, :2]).astype(int) i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1] interacting_enhancers = np.unique(j_s_t)-total_e return len(interacting_enhancers)
def interactions_extractor(chrom): if config_variables.disentagled_features_validation: chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom] else: chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[chrom] true_pro_enh_indexes = un_string(chr_interactions_pro_enh[:, :2]) # true_pro_enh_indexes = un_string(config_variables.chr_interactions_dict_pro_enh[chrom]) prom_enh_false_interactions = chrom_specific_negative_interactions.chrom_specific_negative_interactions(chrom, mode) enh_coordinates, pro_coordinates, indexes_p, indexes_e, total_p, total_e = chrom_specific_negative_interactions.initialise_variables( chrom ) true_pro_enh_indexes[:, 0] = true_pro_enh_indexes[:, 0] - total_p true_pro_enh_indexes[:, 1] = true_pro_enh_indexes[:, 1] - total_e return true_pro_enh_indexes, prom_enh_false_interactions
def MOG_classifier( mode_of_sampler, number_of_samples, burn_in, comb="ER", kappa_0=1.0, mu_0=1.0, alpha_0=1.0, Beta_0=1.0, total_posterior=False, pairwise_number_in_pack=150, chain=1, ): import os def name_creator_and_pair_wise_exist_check(chrom, mode_of_sampler, comb, number_of_samples, burn_in, chain): if mode_of_sampler == "distance_prior": name = "prior_distance_trace_of_c_{0}_{1}".format(chrom, number_of_samples) elif mode_of_sampler == "distance_MOG": name = "MOG_distance_trace_of_c_{0}_{1}_{2}_{3}_{4}_{5}_{6}".format( kappa_0, mu_0, alpha_0, Beta_0, chrom, comb, number_of_samples ) elif mode_of_sampler == "dirichlet_MOG": name = "MOG_dirichlet_trace_of_c_{0}_{1}_{2}_{3}_{4}_{5}_{6}".format( kappa_0, mu_0, alpha_0, Beta_0, chrom, comb, number_of_samples ) elif mode_of_sampler == "distance_MOG_empir_mu": name = "MOG_distance_emprirical_mu_trace_of_c_{0}_{1}_{2}_{3}_{4}_{5}_{6}".format( kappa_0, mu_0, alpha_0, Beta_0, chrom, comb, number_of_samples ) if chain: name = name + "_{0}".format(chain) print chain output_folder = "./MOG_results_/" temp_output = output_folder + "Pairwise_prob/" if not os.path.exists(temp_output): os.makedirs(temp_output) def does_pair_wise_exists(): pair_wise_prob = temp_output + name + "_{0}_pairwise_prob.npy".format(burn_in) pair_wise_prob_exist = os.path.exists(pair_wise_prob) return pair_wise_prob_exist, pair_wise_prob name_of_MOG_chain_file = output_folder + name print name pair_wise_prob_exist, name_of_MOG_chain_file_pair_wise_prob = does_pair_wise_exists() print name, pair_wise_prob_exist return name_of_MOG_chain_file, name_of_MOG_chain_file_pair_wise_prob, pair_wise_prob_exist def loads_MoG_results(chrom, name): import iter_loadtxt _c_trace_raw = iter_loadtxt.iter_loadtxt(name, ",", dtype=int) # saves memory if mode_of_sampler == "dirichlet_MOG": _c_trace = _c_trace_raw else: num_of_promoters = len(config_variables.dict_chrom_pro_survived[chrom]) promoters_fixed_labels = np.zeros((len(_c_trace_raw), num_of_promoters), dtype=int) promoters_fixed_labels[:] = np.arange(num_of_promoters, dtype=int) _c_trace = np.c_[promoters_fixed_labels, _c_trace_raw] return _c_trace def cluster_estimator_similarity(_c_trace): from multiprocessing import Pool pool = Pool(processes=6) # pack = number_of_samples # dim = _c_trace.shape # incr = int(dim[0]/pack) # number_of_samples = bins = range(0, number_of_samples, pairwise_number_in_pack) if bins[-1] <> number_of_samples: bins.append(number_of_samples) a = [_c_trace[bins[i] : bins[i + 1]] for i in range(len(bins[:-1]))] import pararell_methods start = time.time() total_matrix = sum(pool.imap_unordered(pararell_methods.pararell_calc_ne, a)) pool.close() pool.join() end = time.time() print end - start return total_matrix def standard_size_converter(total_matrix, chrom): indexes_p, indexes_e, total_p, total_e = chrom_specific_negative_interactions.initialise_variables(chrom)[2:] length_chr = len(indexes_p) + len(indexes_e) interaction_matrix = np.zeros((length_chr, length_chr), bool) chrom_pro_not_survived = config_variables.dict_chrom_pro_not_survived[chrom] chrom_enh_not_survived = config_variables.dict_chrom_enh_not_survived[chrom] dict_chrom_proximal = config_variables.dict_chrom_proximal if len(chrom_pro_not_survived): interaction_matrix[chrom_pro_not_survived - total_p, :] = True if len(chrom_enh_not_survived): interaction_matrix[ :, len(indexes_p) + chrom_enh_not_survived - total_e ] = True # gets rid of filtered out enhancers which could be causing nans due to their correlations if config_variables.distant_enh_only and len(dict_chrom_proximal[chrom]): interaction_matrix[:, len(indexes_p) + dict_chrom_proximal[chrom] - total_e] = True interaction_matrix = np.invert(interaction_matrix + interaction_matrix.T) temp_expanded_total_matrix = np.zeros(length_chr * length_chr, int) temp_expanded_total_matrix[np.ravel(interaction_matrix)] = np.ravel(total_matrix) expanded_total_matrix = temp_expanded_total_matrix.reshape(length_chr, length_chr) return expanded_total_matrix posterior_of_option = {} chrom_posterior = {} for classification_of_interactions in ["positive_interactions", "negative_interactions"]: posterior_of_option[classification_of_interactions] = {} for chrom_ in chroms_to_infer: name_of_MOG_chain_file, name_of_MOG_chain_file_pair_wise_prob, pair_wise_prob_exist = name_creator_and_pair_wise_exist_check( chrom_, mode_of_sampler, comb, number_of_samples, burn_in, chain ) if not pair_wise_prob_exist: _c_trace_distance = loads_MoG_results(chrom_, name_of_MOG_chain_file) _c_trace_distance = _c_trace_distance[burn_in:] total_matrix = cluster_estimator_similarity(_c_trace_distance) np.save(name_of_MOG_chain_file_pair_wise_prob, total_matrix) else: total_matrix = np.load(name_of_MOG_chain_file_pair_wise_prob) total_matrix = standard_size_converter(total_matrix, chrom_) total_matrix = total_matrix / float(number_of_samples - burn_in) print total_matrix true_pro_enh_indexes, prom_enh_false_interactions = interactions_extractor(chrom_) indexes_p, indexes_e, total_p, total_e = chrom_specific_negative_interactions.initialise_variables(chrom_)[2:] chrom_posterior["positive_interactions"] = total_matrix[ true_pro_enh_indexes[:, 0], true_pro_enh_indexes[:, 1] + len(indexes_p) ] chrom_posterior["negative_interactions"] = total_matrix[ prom_enh_false_interactions[:, 0], prom_enh_false_interactions[:, 1] + len(indexes_p) ] for classification_of_interactions in ["positive_interactions", "negative_interactions"]: posterior_of_option[classification_of_interactions][chrom_] = chrom_posterior[ classification_of_interactions ] for classification_of_interactions in ["positive_interactions", "negative_interactions"]: if total_posterior: posterior_of_option[classification_of_interactions] = list( itertools.chain.from_iterable( [posterior_of_option[classification_of_interactions][chrom__] for chrom__ in chroms_to_infer] ) ) return posterior_of_option["positive_interactions"], posterior_of_option["negative_interactions"]
def MOG_classifier(option_correl, total_posterior = False): import os number_of_samples = config_variables.number_of_samples def loads_MoG_results(chrom): comb = "_".join([config_variables.dict_option[el] for el in option_correl]) kappa_0, mu_0, alpha_0, Beta_0 = config_variables.kappa_0, config_variables.mu_0, config_variables.alpha_0, config_variables.Beta_0 name = 'cluster_trace_of_c_distance_{0}_{1}_{2}_{3}_{4}_{5}_{6}'.format(kappa_0, mu_0, alpha_0, Beta_0, chrom, comb, number_of_samples) name = os.getcwd() + "/MOG_results_/" + name import iter_loadtxt _c_trace_raw = iter_loadtxt.iter_loadtxt(name, ",", dtype = int) # saves memory num_of_promoters = len(config_variables.dict_chrom_pro_survived[chrom]) promoters_fixed_labels = np.zeros((len(_c_trace_raw), num_of_promoters),dtype = int) promoters_fixed_labels[:] = np.arange(num_of_promoters, dtype = int) _c_trace_distance = np.c_[promoters_fixed_labels, _c_trace_raw] return _c_trace_distance def cluster_estimator_similarity(_c_trace): from multiprocessing import Pool pool = Pool(processes = 4) pack = 100 dim = _c_trace.shape incr = int(dim[0]/pack) a = [_c_trace[i*incr:(i+1)*incr] for i in np.arange(pack-1)] + [_c_trace[(pack-1)*incr:]] import pararell_methods start = time.time() total_matrix_2 = sum(pool.imap_unordered(pararell_methods.pararell_calc_ne, a)) pool.close() pool.join() end = time.time() print end-start return total_matrix def standard_size_converter(total_matrix, chrom): indexes_p, indexes_e, total_p, total_e = chrom_specific_negative_interactions.initialise_variables(chrom)[2:] length_chr = len(indexes_p) + len(indexes_e) interaction_matrix = np.zeros((length_chr, length_chr), bool) chrom_pro_not_survived = config_variables.dict_chrom_pro_not_survived[chrom] chrom_enh_not_survived = config_variables.dict_chrom_enh_not_survived[chrom] dict_chrom_proximal = config_variables.dict_chrom_proximal if len(chrom_pro_not_survived): interaction_matrix[chrom_pro_not_survived - total_p, :] = True if len(chrom_enh_not_survived): interaction_matrix[:, len(indexes_p) + chrom_enh_not_survived - total_e] = True # gets rid of filtered out enhancers which could be causing nans due to their correlations if config_variables.distant_enh_only and len(dict_chrom_proximal[chrom]): interaction_matrix[:, len(indexes_p) + dict_chrom_proximal[chrom] - total_e] = True interaction_matrix = np.invert(interaction_matrix + interaction_matrix.T) temp_expanded_total_matrix = np.zeros(length_chr * length_chr, int) temp_expanded_total_matrix[np.ravel(interaction_matrix)] = np.ravel(total_matrix) expanded_total_matrix = temp_expanded_total_matrix.reshape(length_chr, length_chr) return expanded_total_matrix posterior_of_option = {} chrom_posterior = {} for classification_of_interactions in ["positive_interactions", "negative_interactions"]: posterior_of_option[classification_of_interactions] = {} for chrom_ in chroms_to_infer: _c_trace_distance = loads_MoG_results(chrom_) total_matrix = cluster_estimator_similarity(_c_trace_distance) total_matrix = standard_size_converter(total_matrix, chrom_) number_of_iterations = float(len(_c_trace_distance)) total_matrix = total_matrix / number_of_iterations true_pro_enh_indexes, prom_enh_false_interactions = interactions_extractor(chrom_) indexes_p, indexes_e, total_p, total_e = chrom_specific_negative_interactions.initialise_variables(chrom_)[2:] chrom_posterior["positive_interactions"] = total_matrix[true_pro_enh_indexes[:,0], true_pro_enh_indexes[:,1] + len(indexes_p)] chrom_posterior["negative_interactions"] = total_matrix[prom_enh_false_interactions[:,0], prom_enh_false_interactions[:,1] + len(indexes_p)] for classification_of_interactions in ["positive_interactions", "negative_interactions"]: posterior_of_option[classification_of_interactions][chrom_] = chrom_posterior[classification_of_interactions] for classification_of_interactions in ["positive_interactions", "negative_interactions"]: if total_posterior: posterior_of_option[classification_of_interactions] = list(itertools.chain.from_iterable([posterior_of_option[classification_of_interactions][chrom__] for chrom__ in chroms_to_infer])) return posterior_of_option["positive_interactions"], posterior_of_option["negative_interactions"]