def prior_producer(): import config_variables import correl_distance_extractor_clean np = config_variables.np log_distances = config_variables.log_distances chroms_in_prior = config_variables.chroms_in_prior dataset_names_option = config_variables.dataset_names_option mode = config_variables.mode domain = config_variables.domain negative_interactions = config_variables.negative_interactions one_sided_or_two_sided = config_variables.one_sided_or_two_sided domain_like_chromosome_correction = config_variables.domain_like_chromosome_correction prior_elements = {} if config_variables.MoG_classificator: prior_mode = False else: prior_mode = True for type_of_interaction in ["promoter_enhancer_interactions", "enhancer_enhancer_interactions"]: prior_elements[type_of_interaction] = {} for classification_of_interactions in ["positive_interactions", "negative_interactions"]: prior_elements[type_of_interaction][classification_of_interactions] = {} for attribute_of_interaction in ["distance", "correlation"]: prior_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction] = {} if attribute_of_interaction == "correlation": for data_set_name in dataset_names_option: prior_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction][data_set_name] = {} prior_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction][data_set_name]["attribute_values"] = {} prior_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction][data_set_name]["prior_bins"] = np.array([]) prior_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction][data_set_name]["prior_frequencies"] = np.array([]) for chrom_ in chroms_in_prior: prior_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction][data_set_name]["attribute_values"][chrom_] = np.array([]) else: prior_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction]["attribute_values"] = {} prior_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction]["prior_bins"] = np.array([]) prior_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction]["prior_frequencies"] = np.array([]) for chrom_ in chroms_in_prior: prior_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction]["attribute_values"][chrom_] = np.array([]) if not(domain) and not(domain_like_chromosome_correction): if one_sided_or_two_sided == "double_sided": if log_distances: prior_elements = correl_distance_extractor_clean.correl_distance_extractor(mode, prior_elements, chroms_in_prior, absolute = False, logged = True, prior_mode = prior_mode) else: prior_elements = correl_distance_extractor_clean.correl_distance_extractor(mode, prior_elements, chroms_in_prior, absolute = False, logged = False, prior_mode = prior_mode) elif one_sided_or_two_sided == "single_sided": if log_distances: prior_elements = correl_distance_extractor_clean.correl_distance_extractor(mode, prior_elements, chroms_in_prior, absolute = True, logged = True, prior_mode = prior_mode) else: prior_elements = correl_distance_extractor_clean.correl_distance_extractor(mode, prior_elements, chroms_in_prior, absolute = True, logged = False, prior_mode = prior_mode) else: if one_sided_or_two_sided == "double_sided": prior_elements = correl_distance_extractor_clean.correl_distance_extractor(mode, prior_elements, chroms_in_prior, absolute = False, logged = False, prior_mode = prior_mode) elif one_sided_or_two_sided == "single_sided": prior_elements = correl_distance_extractor_clean.correl_distance_extractor(mode, prior_elements, chroms_in_prior, absolute = True, logged = False, prior_mode = prior_mode) import interacting_domain_clean as interacting_domain from prepare_interactions_clean import un_string dataset_time_series_dict = config_variables.dataset_time_series_dict link_data_set_name_to_file_name = config_variables.link_data_set_name_to_file_name chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh chr_interactions_dict_enh_enh = config_variables.chr_interactions_dict_enh_enh def interacting_enhancers_coordinates_function(): interacting_enhancers_coord = {} enh_coordinates = dataset_time_series_dict[link_data_set_name_to_file_name["enhancers"]['ER']][1] for chrom in chroms_in_prior: if mode == "promoter_enhancer_interactions": chrom_interacting_enhancers_pro = np.unique(un_string(chr_interactions_dict_pro_enh[chrom])[:,1]) interacting_enhancers_coord[chrom] = enh_coordinates[chrom_interacting_enhancers_pro] #if config_variables.alternative_classificator_outside_enhancers: #interacting_enhancers_coord[chrom] = enh_coordinates[config_variables.chrom_interacting_enhancers_pro[chrom]] elif mode == "enhancer_enhancer_interactions": chrom_interacting_enhancers_enh = np.unique(un_string(chr_interactions_dict_enh_enh[chrom])[:,0]) chrom_interacting_enhancers_enh = np.unique(np.r_[chrom_interacting_enhancers_enh, np.unique(un_string(chr_interactions_dict_enh_enh[chrom])[:,1])]) interacting_enhancers_coord[chrom] = enh_coordinates[chrom_interacting_enhancers_enh] return interacting_enhancers_coord prior_elements[mode]["interacting_enhancers_coordinates"] = interacting_enhancers_coordinates_function() #chrom, interacting_enhancer_coordinates, distances, possible_distances_counts, length = chrom, interacting_enhancer_coordinates_chrom, total_array, possible_distances_counts, 200 def adaptive_domain_dummy_inter_matrix_version(chrom, interacting_enhancer_coordinates, distances, possible_distances_counts, length): possible_interaction_centres = interacting_enhancer_coordinates.mean(1)[:, None] + distances #should be if dist - then one end if dist + then other end reshape_size = possible_interaction_centres.size possible_interaction_coordinates = np.c_[(possible_interaction_centres.reshape(reshape_size) - length)[:, None], (possible_interaction_centres.reshape(reshape_size) + length)[:, None]] def state_dependent_allocation(state, chrom): allocation_of_coordinates, size_domains = interacting_domain.interacting_domains(interacting_enhancer_coordinates, possible_interaction_coordinates, chrom, state, matrix_version = False, chromosome_version = domain_like_chromosome_correction) # matrix_mode = False allocations_of_interacting_enhancers = allocation_of_coordinates[:len(interacting_enhancer_coordinates)] allocations_of_interacting_enhancer_possible_interactions = allocation_of_coordinates[len(interacting_enhancer_coordinates):] allocations_of_interacting_enhancer_possible_interactions = allocations_of_interacting_enhancer_possible_interactions.reshape(len(interacting_enhancer_coordinates), len(distances)) dummy_interactions_in_the_same_domain_as_enhancer = allocations_of_interacting_enhancers[:,None] == allocations_of_interacting_enhancer_possible_interactions return dummy_interactions_in_the_same_domain_as_enhancer, size_domains dummy_interactions_in_the_same_domain_as_enhancer_left, size_domains = state_dependent_allocation("left", chrom) dummy_interactions_in_the_same_domain_as_enhancer_right, size_domains = state_dependent_allocation("right", chrom) possible_distances_counts_chrom = ((dummy_interactions_in_the_same_domain_as_enhancer_left + dummy_interactions_in_the_same_domain_as_enhancer_right).astype(int)).sum(0) possible_distances_counts = possible_distances_counts + possible_distances_counts_chrom return possible_distances_counts, size_domains chrom_domain_sizes = {} import itertools for classification_of_interactions in ["positive_interactions", "negative_interactions"]: total_array = [prior_elements[mode][classification_of_interactions]["distance"]["attribute_values"][chrom_] for chrom_ in chroms_in_prior] total_array = list(itertools.chain.from_iterable(total_array)) possible_distances_counts = np.zeros_like(total_array) for chrom in chroms_in_prior: interacting_enhancer_coordinates_chrom = prior_elements[mode]["interacting_enhancers_coordinates"][chrom] possible_distances_counts, size_domains_chrom = adaptive_domain_dummy_inter_matrix_version(chrom, interacting_enhancer_coordinates_chrom, total_array, possible_distances_counts, 200) chrom_domain_sizes[chrom] = size_domains_chrom prior_elements[mode][classification_of_interactions]["distance"]["possible_distances_counts"] = possible_distances_counts if any(possible_distances_counts == 0): #interacting_enhancer_coordinates = prior_elements[mode]["interacting_enhancers_coordinates"] #possible_distances_counts, total_array = corrector(interacting_enhancer_coordinates, total_array, possible_distances_counts, 200) if mode == "promoter_enhancer_interactions" and classification_of_interactions == "positive_interactions": print "possible_distances_p_e_counts_true should not have 0 count distances which could be corrected by corrector() i.e something wrong" if one_sided_or_two_sided == "double_sided": if log_distances: prior_elements = correl_distance_extractor_clean.correl_distance_extractor(mode, prior_elements, chroms_in_prior, absolute = False, logged = True, prior_mode = prior_mode) elif one_sided_or_two_sided == "single_sided": if log_distances: prior_elements = correl_distance_extractor_clean.correl_distance_extractor(mode, prior_elements, chroms_in_prior, absolute = True, logged = True, prior_mode = prior_mode) return prior_elements
def infered_elements_filler(): import config_variables import correl_distance_extractor_clean mode = config_variables.mode domain = config_variables.domain chroms_to_infer = config_variables.chroms_to_infer np = config_variables.np dataset_names_option = config_variables.dataset_names_option one_sided_or_two_sided = config_variables.one_sided_or_two_sided log_distances = config_variables.log_distances infered_elements = {} for type_of_interaction in ["promoter_enhancer_interactions", "enhancer_enhancer_interactions"]: infered_elements[type_of_interaction] = {} for classification_of_interactions in ["positive_interactions", "negative_interactions"]: infered_elements[type_of_interaction][classification_of_interactions] = {} for attribute_of_interaction in ["distance", "correlation"]: infered_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction] = {} if attribute_of_interaction == "correlation": for data_set_name in dataset_names_option: infered_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction][data_set_name] = {} infered_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction][data_set_name]["attribute_values"] = {} for probability_of_being_positive_or_negative in ["probabilities_of_being_positive_interactions", "probabilities_of_being_negative_interactions"]: infered_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction][data_set_name][probability_of_being_positive_or_negative] = {} for chrom_ in chroms_to_infer: infered_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction][data_set_name]["attribute_values"][chrom_] = np.array([]) for probability_of_being_positive_or_negative in ["probabilities_of_being_positive_interactions", "probabilities_of_being_negative_interactions"]: infered_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction][data_set_name][probability_of_being_positive_or_negative][chrom_] = np.array([]) else: infered_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction]["attribute_values"] = {} for probability_of_being_positive_or_negative in ["probabilities_of_being_positive_interactions", "probabilities_of_being_negative_interactions"]: infered_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative] = {} for chrom_ in chroms_to_infer: infered_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction]["attribute_values"][chrom_] = np.array([]) for probability_of_being_positive_or_negative in ["probabilities_of_being_positive_interactions", "probabilities_of_being_negative_interactions"]: infered_elements[type_of_interaction][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][chrom_] = np.array([]) if one_sided_or_two_sided == "double_sided": if log_distances: infered_elements = correl_distance_extractor_clean.correl_distance_extractor(mode, infered_elements, chroms_to_infer, absolute = False, logged = True) else: infered_elements = correl_distance_extractor_clean.correl_distance_extractor(mode, infered_elements, chroms_to_infer, absolute = False, logged = False) elif one_sided_or_two_sided == "single_sided": if log_distances: infered_elements = correl_distance_extractor_clean.correl_distance_extractor(mode, infered_elements, chroms_to_infer, absolute = True, logged = True) else: infered_elements = correl_distance_extractor_clean.correl_distance_extractor(mode, infered_elements, chroms_to_infer, absolute = True, logged = False) return infered_elements