def get_0nM_enrichment_dict(self): assert len(self.enrichments_to_0nM) == 4**self.k return { kmer: enrich for kmer, enrich in zip(RBNS_utils.yield_kmers(self.k), self.enrichments_to_0nM) }
def save_0nM_enrichments(self, enrich_pkl): enriches_by_kmer_D = {} num_kmers = len(self.enrichments_to_0nM) k = int(math.log(num_kmers, 4.)) for kmer_num, kmer in enumerate(RBNS_utils.yield_kmers(k)): enriches_by_kmer_D[kmer] = self.enrichments_to_0nM[kmer_num] cPickle.dump(enriches_by_kmer_D, open(enrich_pkl, 'wb'))
def load_0nM_enrichments(self, enrich_pkl): enriches_by_kmer_D = cPickle.load(open(enrich_pkl, 'rb')) k = int(math.log(len(enriches_by_kmer_D), 4.)) enriches_L = [] for kmer in RBNS_utils.yield_kmers(k): enriches_L.append(enriches_by_kmer_D[kmer]) self.enrichments_to_0nM = np.array(enriches_L)
def save_enrichments(self, enrich_pkl): RBNS_utils.make_dir(os.path.dirname(enrich_pkl)) enriches_by_kmer_D = {} num_kmers = len(self.enrichments) k = int(math.log(num_kmers, 4.)) for kmer_num, kmer in enumerate(RBNS_utils.yield_kmers(k)): enriches_by_kmer_D[kmer] = self.enrichments[kmer_num] cPickle.dump(enriches_by_kmer_D, open(enrich_pkl, 'wb'))
def get_kmer_freqs_from_reads_F(reads_F, k, vals_sum_to="sumto1"): """ - Returns the kmer counts & freqs in reads_F - INPUTs: - vals_sum_to "sumto1": all 4^k entries sum to 1 "sumto4^k": all 4^k entries sum to 4^k """ counts_by_kmer_D = {} for kmer in RBNS_utils.yield_kmers(k): counts_by_kmer_D[kmer] = 0 with open(reads_F) as f: for line in f: read = line.strip() for start_pos in range(len(read) - k + 1): kmer = read[start_pos:(start_pos + k)] # only include it if it doesn't have an N try: counts_by_kmer_D[kmer] += 1 except KeyError: pass return_D = {"counts_by_kmer_D": counts_by_kmer_D} if (vals_sum_to == "none"): return counts_by_kmer_D #### Normalize using the helper function in dict_helpers elif (vals_sum_to == "sumto1"): freqs_by_kmer_D = RBNS_utils.normalize_D(counts_by_kmer_D) elif (vals_sum_to == "sumto4^k"): freqs_by_kmer_D = RBNS_utils.normalize_D(counts_by_kmer_D, vals_sum_to="sumto4^k") else: print "{0} IS NOT A VALID vals_sum_to ARGUMENT. REPLACE AND TRY AGAIN\n".format( vals_sum_to) return_D["freqs_by_kmer_D"] = freqs_by_kmer_D return return_D
def weight_dict(self): kmer2weight = {} for kmer, weight in zip(RBNS_utils.yield_kmers(self.k), self.profile): kmer2weight[kmer] = weight return kmer2weight
def return_frequency_and_number_of_reads_kmer_in_reads_F(reads_F, kmer): """ - For a reads_F, makes a new out_reads_F in the same directory in which each occurrence of the kmer is replaced with "X"s - Called by functions in RBNS_logos.py - RETURNS: return_D = {"out_reads_F": out_reads_F, "tot_num_reads": tot_num_reads, "num_reads_w_kmer": num_reads_w_kmer, "freq_reads_w_kmer": freq_reads_w_kmer, "tot_num_kmer_occurs" : tot_num_kmer_occurs, "counts_by_kmer_D": counts_by_kmer_D} "freqs_by_kmer_D": freqs_by_kmer_D} """ k = len(kmer) read_len = get_readlength(reads_F) orig_reads_DIR = os.path.dirname(reads_F) orig_reads_basename = os.path.basename(reads_F) out_basename = orig_reads_basename.rsplit(".", 1)[0] +\ "_{}.reads".format( kmer ) #### If the file name is over 100 characters, shorten it if (len(out_basename) >= 100): out_basename = "{}.reads".format(kmer) out_reads_F = os.path.join(orig_reads_DIR, out_basename) #### The number of reads and number of times a kmer was found tot_num_reads = 0 num_reads_w_kmer = 0 tot_num_kmer_occurs = 0 #### A dictionary of kmer frequencies for the reads written out counts_by_kmer_D = {} for this_kmer in RBNS_utils.yield_kmers(k): counts_by_kmer_D[this_kmer] = 0 reads_f = open(reads_F) out_reads_f = open(out_reads_F, "w") reads_to_write_out_L = [] for line in reads_f: tot_num_reads += 1 if (len(reads_to_write_out_L) == 10000): for read in reads_to_write_out_L: out_reads_f.write(read + "\n") reads_to_write_out_L = [] read = line.strip() cont = True found_any = False while (cont == True): kmer_pos = read.find(kmer) if (kmer_pos == -1): if (found_any == True): num_reads_w_kmer += 1 for start_pos in range(read_len - k + 1): this_kmer = read[start_pos:(start_pos + k)] try: counts_by_kmer_D[this_kmer] += 1 except KeyError: pass reads_to_write_out_L.append(read) #out_reads_f.write( read + "\n" ) cont = False #### If an occurrence of this kmer was found, replace it with X's #### and write out the read else: found_any = True tot_num_kmer_occurs += 1 read = read[:kmer_pos] + "X" * k + read[(kmer_pos + k):] for read in reads_to_write_out_L: out_reads_f.write(read + "\n") reads_f.close() out_reads_f.close() #### Normalize the counts_by_kmer_D into freqs freqs_by_kmer_D = RBNS_utils.normalize_D(counts_by_kmer_D) freq_reads_w_kmer = float(num_reads_w_kmer) / tot_num_reads return_D = { "out_reads_F": out_reads_F, "tot_num_reads": tot_num_reads, "num_reads_w_kmer": num_reads_w_kmer, "freq_reads_w_kmer": freq_reads_w_kmer, "tot_num_kmer_occurs": tot_num_kmer_occurs, "counts_by_kmer_D": counts_by_kmer_D, "freqs_by_kmer_D": freqs_by_kmer_D } return return_D
def calc_Ppaired_over_top_enriched_kmers_and_flanking(reads_w_struct_F, k, fiveP_adapter, threeP_adapter, random_read_len, num_bins=5): """ - For an input reads_struct_F like: RBFOX3_input.w_struc.reads.gz RBFOX3_20.w_struc.reads.gz, gets all occurrences of each of the kmers and calculates the Ppaired over each position of the motif & 10 bases flanking it upstream & downstream, as well as the average Ppaired over each motif occurrence (i.e., which of the num_bins Ppaired bins it should go into for later calculating the R by Ppaired bin) - Pickles an output dictionary in out_Ds_DIR for later loading & analysis """ assert (num_bins in [5, 10]) starting_basename = os.path.basename(reads_w_struct_F).split('.w_st')[0] fiveP_len = len(fiveP_adapter) threeP_len = len(threeP_adapter) out_Ds_DIR = os.path.join(os.path.dirname(reads_w_struct_F), 'Ppaired_Ds', str(k)) RBNS_utils.make_dir(out_Ds_DIR) out_D_F = os.path.join(out_Ds_DIR, "{0}.D.pkl".format(starting_basename)) if os.path.exists(out_D_F): return ##### Make sure that the adapter lengths & random read length match up for lines_L in RBNS_utils.iterNlines(reads_w_struct_F, 4, strip_newlines=True): read_w_adapter = lines_L[0] calculated_random_read_len = len( read_w_adapter) - fiveP_len - threeP_len assert (calculated_random_read_len == random_read_len) break random_idx_L = range(random_read_len) upper_index_of_random = random_read_len + fiveP_len num_kmers_each_read = random_read_len - k + 1 D = { "num_reads": 0, "Ppair_and_count_by_kmer_idx_D": {}, "counts_by_kmer_binidx_D": {} } for kmer in RBNS_utils.yield_kmers(k): D["counts_by_kmer_binidx_D"][kmer] = {} for i in range(num_bins): D["counts_by_kmer_binidx_D"][kmer][i] = 0 D["Ppair_and_count_by_kmer_idx_D"][kmer] = {} for idx in range(-10, k + 10): D["Ppair_and_count_by_kmer_idx_D"][kmer][idx] = { 'counts': 0, 'Ppaired_sum': 0. } for lines_L in RBNS_utils.iterNlines(reads_w_struct_F, 4, strip_newlines=True): read_w_adapter = lines_L[0] random_seq = read_w_adapter[fiveP_len:upper_index_of_random] seq_L = [x for x in random_seq] Ppaired_L = lines_L[1].split(" ") pruned_Ppaired_L = [ float(x) for x in Ppaired_L[fiveP_len:upper_index_of_random] ] seq_Ppaired_T_L = zip(seq_L, pruned_Ppaired_L) D["num_reads"] += 1 for start_idx in range(num_kmers_each_read): #### Get the kmers in the read kmer = random_seq[start_idx:(start_idx + k)] Ppaired_kmer_L = pruned_Ppaired_L[start_idx:(start_idx + k)] mean_Ppaired = np.mean(Ppaired_kmer_L) if (num_bins == 5): bin_idx = get_bin_of_5_from_mean_Ppaired(mean_Ppaired) elif (num_bins == 10): bin_idx = get_bin_of_10_from_mean_Ppaired(mean_Ppaired) D["counts_by_kmer_binidx_D"][kmer][bin_idx] += 1 ##### Go through and get all of the Ppaired flanking for rel_idx in range(-10, 10 + k): this_idx = start_idx + rel_idx if this_idx in random_idx_L: D["Ppair_and_count_by_kmer_idx_D"][kmer][rel_idx][ 'counts'] += 1 D["Ppair_and_count_by_kmer_idx_D"][kmer][rel_idx]['Ppaired_sum'] +=\ pruned_Ppaired_L[this_idx] ##### Pickle to out_D_F RBNS_utils.pkl_with_formatfile(D, out_D_F)
def analyze_freqs_by_position_one_library(protein, main_DIR, conc_for_fastq, ks_L, make_output_Fs=True, num_controls=20, max_log2_val_colormap=None): """ - Calculates the KL divergence of (Uniform across read || Observed freqs. across read) for each kmer, and outputs a .txt table with kmers in descending order of KL Divergence - INPUTs: - make_output_Fs: - If True, makes a .txt out file and a plot; - If Flase, doesn't make .txt/.pdf (this is used the first time around to get the maximum absolute log2 value so that on the second time around when plots are made, all the colorbars can be coordinated togeter) - max_log2_val_colormap: - If passed in, the heatmap colorbar will go from -max_log2_val_colormap to max_log2_val_colormap """ return_D = {} if (conc_for_fastq == "input"): conc_label = "Input lib." else: conc_label = "{} nM lib.".format(conc_for_fastq) frequency_Ds_DIR = os.path.join(main_DIR, "frequency_Ds") RBNS_utils.make_dir(frequency_Ds_DIR) #### go through each of the k's for k in ks_L: #### Load the previously pickled dictionary of kmer frequencies at each #### position D_F = os.path.join( frequency_Ds_DIR, "{0}_{1}.{2}mer.frequencies.by_position.pkl".format( protein, conc_for_fastq, k)) with open(D_F) as f: freqs_by_pos_kmer_D = pickle.load(f) num_kmers_per_rd = len(freqs_by_pos_kmer_D.keys()) #### A uniform distribution over all positions in the read uniform_L = [1. / num_kmers_per_rd] * num_kmers_per_rd #### Now go through each of the kmers and get the KL divergence of #### KLDiv( uniform || observed freqs. across read ) kmer_KLDiv_tuples_L = [] kmer_to_KLDiv_D = {} for kmer in RBNS_utils.yield_kmers(k): obs_freqs_L = [ freqs_by_pos_kmer_D[x][kmer] for x in range(num_kmers_per_rd) ] sum_obs_freqs = sum(obs_freqs_L) #### Normalize the obs_freqs_L obs_freqs_L = [x / sum_obs_freqs for x in obs_freqs_L] #### Get the KL Divergence KL = RBNS_utils.KL_divergence(uniform_L, obs_freqs_L) kmer_to_KLDiv_D[kmer] = KL #### Also get the log2(OBSERVED/UNIFORM) at each position try: observed_over_unif_L = [math.log(obs_freqs_L[i] / uniform_L[i], 2)\ for i in range( num_kmers_per_rd )] except ValueError: observed_over_unif_L = [] for i in range(num_kmers_per_rd): try: observed_over_unif_L.append( math.log(obs_freqs_L[i] / uniform_L[i], 2)) except ValueError: observed_over_unif_L.append(1.) kmer_KLDiv_tuples_L.append((kmer, KL, observed_over_unif_L)) #### Sort the kmers by decreasing kmer_KLDiv_tuples_L.sort(key=lambda x: -1 * x[1]) #### Get the mean KL divergence KL_divs = [x[1] for x in kmer_KLDiv_tuples_L] mean_KL, std_KL = RBNS_utils.mean_std(KL_divs) #### a 3 STD threshold for the "most unequal" three_STD_thresh = mean_KL + (3 * std_KL) neg1_STD_thresh = mean_KL - std_KL #### Go through and get the kmers & KL divergences for those that are #### >= 3 STD three_STD_tuples_L = kmer_KLDiv_tuples_L[:30] #three_STD_tuples_L = [x for x in kmer_KLDiv_tuples_L if x[1] >=\ # three_STD_thresh ] #print "{0}".format( len(three_STD_tuples_L) ) #### Go through each of the significant kmers sig_kmers_to_KL_Div_D = {} sig_kmers_to_log2_Obs_over_Exp_L_D = {} #### the maximum absolute log2 value plotted, so that all of the #### libraries can have the same colormap scale max_abs_log2_plotted = 0. for kmer, KL, observed_over_unif_L in three_STD_tuples_L: sig_kmers_to_KL_Div_D[kmer] = KL sig_kmers_to_log2_Obs_over_Exp_L_D[kmer] = observed_over_unif_L #### update max_abs_log2_plotted max_abs_log2_plotted = max(max_abs_log2_plotted, max(observed_over_unif_L), -1 * min(observed_over_unif_L)) #### Get "control" kmer distributions that have KL divergence below #### the mean ctrl_STD_tuples_L = [x for x in kmer_KLDiv_tuples_L if x[1] < mean_KL] control_delta = int(len(ctrl_STD_tuples_L) / float(num_controls)) #### go through and get the 20 evently spaced controls to plot control_tuples_L = [ ctrl_STD_tuples_L[control_delta * x] for x in range(num_controls) ] #### Go through each of the control kmers low_kmers_to_KL_Div_D = {} low_kmers_to_log2_Obs_over_Exp_L_D = {} for kmer, KL, observed_over_unif_L in control_tuples_L: low_kmers_to_KL_Div_D[kmer] = KL low_kmers_to_log2_Obs_over_Exp_L_D[kmer] = observed_over_unif_L #### update max_abs_log2_plotted max_abs_log2_plotted = max(max_abs_log2_plotted, max(observed_over_unif_L), -1 * min(observed_over_unif_L)) #### add the max_abs_log2_plotted to the return_D return_D[k] = {"max_abs_log2_plotted": max_abs_log2_plotted} if (make_output_Fs == True): #### Make the out_F out_DIR = os.path.join(main_DIR, "tables/by_position") RBNS_utils.make_dir(out_DIR) out_basename = "{0}mers.{1}greatest_KL_div_of_freqs_across_read.txt".format( k, conc_label.replace(" ", "_")) out_F = os.path.join(out_DIR, out_basename) with open(out_F, "w") as f_out: #### write a header line f_out.write("{0}: {1}\n".format(protein, conc_label)) f_out.write( "\tKL Div(Uniform||Observed)\t\tlog2(Obs/Unif) at Pos 1\tPos. 2\n" ) #### Go through and write out all of the kmers for kmer, KL, observed_over_unif_L in kmer_KLDiv_tuples_L: f_out.write("\n{0}\t{1:.4g}\t\t".format(kmer, KL)) for ratio in observed_over_unif_L: f_out.write("{0:.3f}\t".format(ratio)) return_D[k] = {"out_F": out_F, "kmer_to_KLDiv_D": kmer_to_KLDiv_D} #### Make a plot using the helper function in #### /helpers/python_helpers/plots.py returned_fig_D = RBNS_plots.make_rectangular_heatmap_plot_RBNS_freqs( sig_kmers_to_KL_Div_D, sig_kmers_to_log2_Obs_over_Exp_L_D, low_kmers_to_KL_Div_D, low_kmers_to_log2_Obs_over_Exp_L_D, title="{0}: {1} {2}mer frequencies across reads".format( protein.replace("_", " "), conc_label, k), colorbar_label=r"$log_2$(Observed / Uniform freq.)", max_log2_val_colormap=max_log2_val_colormap) return_D[k]["fig"] = returned_fig_D["fig"] return return_D
def analyze_freqs_by_position_one_barcodes_ordered_kmers_to_consider( ordered_kmers_to_consider_Ls_by_k_D, protein, main_DIR, conc_for_fastq, ks_L, ordered_kmers_description_fnames="", make_output_Fs=True, num_controls=20, max_log2_val_colormap=None): """ - A helper function called by the analyze_freqs_by_position_all_barcodes_one_protein_top_enriched_kmers() function below - Using the previously pickled dictionaries from the get_counts_freqs_by_pos_one_F() function above, loads them and calculates the KL divergence of (Uniform across read || Observed freqs. across read) for each kmer, and outputs a .txt table with kmers in descending order of KL Divergence - INPUTs: - counts_by_pos_DIR: - directory that has pickled dictionaries, like: /net/uorf/data/backup/RBNS_results/srsf8/counts/by_position - basename_start: - e.g., "80", or "input_library" - pprint_lib_name: - a "nice" name to use for the title (e.g., "Input Library") - ordered_kmers_to_consider_L: - the kmers to consider, e.g., the sig. enriched kmers - add_to_end_title_str: - e.g., ", Perfect Adapter Reads Only" - make_output_Fs: - If True, makes a .txt out file and a plot; - If False, doesn't make .txt/.pdf (this is used the first time around to get the maximum absolute log2 value so that on the second time around when plots are made, all the colorbars can be coordinated togeter) - ordered_kmers_description_fnames: - a name that will be added to the output PDF & .txt files to distinguish it (e.g., "3std" to denote these kmers as those with Z-score >= 3) - "3.0std" - "2.0std" - "least" - "Adapter 1" or "Adapter 2" - max_log2_val_colormap: - If passed in, the heatmap colorbar will go from -max_log2_val_colormap to max_log2_val_colormap """ return_D = {} frequency_Ds_DIR = os.path.join(main_DIR, "frequency_Ds") RBNS_utils.make_dir(frequency_Ds_DIR) if (conc_for_fastq == "input"): conc_label = "Input lib." else: conc_label = "{} nM lib.".format(conc_for_fastq) #### go through each of the k's for k in ks_L: ordered_kmers_to_consider_L = ordered_kmers_to_consider_Ls_by_k_D[k] if (len(ordered_kmers_to_consider_L) == 0): return_D[k] = {} continue #### Load the previously pickled dictionary of kmer frequencies at each #### position D_F = os.path.join( main_DIR, "frequency_Ds/{0}_{1}.{2}mer.frequencies.by_position.pkl".format( protein, conc_for_fastq, k)) with open(D_F) as f: freqs_by_pos_kmer_D = pickle.load(f) num_kmers_per_rd = len(freqs_by_pos_kmer_D.keys()) #### A uniform distribution over all positions in the read uniform_L = [1. / num_kmers_per_rd] * num_kmers_per_rd #### Now go through each of the kmers and get the KL divergence of #### KLDiv( uniform || observed freqs. across read ) kmer_KLDiv_tuples_L = [] kmer_to_KLDiv_D = {} for kmer in RBNS_utils.yield_kmers(k): obs_freqs_L = [freqs_by_pos_kmer_D[x][kmer] for x in\ range( num_kmers_per_rd )] sum_obs_freqs = sum(obs_freqs_L) #### Normalize the obs_freqs_L obs_freqs_L = [x / sum_obs_freqs for x in obs_freqs_L] #### Get the KL Divergence KL = RBNS_utils.KL_divergence(uniform_L, obs_freqs_L) kmer_to_KLDiv_D[kmer] = KL #### Also get the log2(OBSERVED/UNIFORM) at each position observed_over_unif_L = [] for i in range(num_kmers_per_rd): try: observed_over_unif_L.append( math.log(obs_freqs_L[i] / uniform_L[i], 2)) except ValueError: observed_over_unif_L.append(0.) kmer_KLDiv_tuples_L.append((kmer, KL, observed_over_unif_L)) #### Sort the kmers by decreasing kmer_KLDiv_tuples_L.sort(key=lambda x: -1 * x[1]) #### Get the mean KL divergence KL_divs = [x[1] for x in kmer_KLDiv_tuples_L] mean_KL, std_KL = RBNS_utils.mean_std(KL_divs) #### Extract the ordered_kmers_to_consider kmer_KLDiv_tuples_desiredkmers_L = [] for kmer in ordered_kmers_to_consider_L: #### Go through kmer_KLDiv_tuples_L and get the tuple for this kmer for tupl in kmer_KLDiv_tuples_L: if (tupl[0] == kmer): kmer_KLDiv_tuples_desiredkmers_L.append(tupl) break #### Go through each of the significant kmers kmers_to_KL_Div_D = {} kmers_to_log2_Obs_over_Exp_L_D = {} #### the maximum absolute log2 value plotted, so that all of the #### libraries can have the same colormap scale max_abs_log2_plotted = 0. for kmer, KL, observed_over_unif_L in kmer_KLDiv_tuples_desiredkmers_L: kmers_to_KL_Div_D[kmer] = KL kmers_to_log2_Obs_over_Exp_L_D[kmer] = observed_over_unif_L #### update max_abs_log2_plotted max_abs_log2_plotted = max(max_abs_log2_plotted, max(observed_over_unif_L), -1 * min(observed_over_unif_L)) #### Get "control" kmer distributions that have KL divergence below #### the mean ctrl_STD_tuples_L = [x for x in kmer_KLDiv_tuples_L if x[1] < mean_KL] control_delta = int(len(ctrl_STD_tuples_L) / float(num_controls)) #### go through and get the 20 evently spaced controls to plot control_tuples_L = [ ctrl_STD_tuples_L[control_delta * x] for x in range(num_controls) ] #### Go through each of the control kmers control_kmers_to_KL_Div_D = {} control_kmers_to_log2_Obs_over_Exp_L_D = {} for kmer, KL, observed_over_unif_L in control_tuples_L: control_kmers_to_KL_Div_D[kmer] = KL control_kmers_to_log2_Obs_over_Exp_L_D[kmer] = observed_over_unif_L #### update max_abs_log2_plotted max_abs_log2_plotted = max(max_abs_log2_plotted, max(observed_over_unif_L), -1 * min(observed_over_unif_L)) #### add the max_abs_log2_plotted to the return_D return_D[k] = { "max_abs_log2_plotted": max_abs_log2_plotted, "kmers_to_KL_Div_D": kmers_to_KL_Div_D, "kmers_to_log2_Obs_over_Exp_L_D": kmers_to_log2_Obs_over_Exp_L_D, "control_kmers_to_KL_Div_D": control_kmers_to_KL_Div_D, "control_kmers_to_log2_Obs_over_Exp_L_D": control_kmers_to_log2_Obs_over_Exp_L_D } if make_output_Fs: #### Make the out_F out_DIR = os.path.join(main_DIR, "tables/by_position") RBNS_utils.make_dir(out_DIR) #### < Make a table of KL div by decreasing R of sig. R kmers > ### out_basename = "{0}mers.{1}sig_R_{2}.KL_div_of_freqs_across_read.txt".format( k, conc_label.replace(" ", "_"), ordered_kmers_description_fnames.replace(" ", "_")) out_F = os.path.join(out_DIR, out_basename) with open(out_F, "w") as f_out: #### write a header line f_out.write("{0}: {1}\n".format(protein.replace("_", " "), conc_label)) f_out.write( "\tKL Div(Uniform||Observed)\t\tlog2(Obs/Unif) at Pos 1\tPos. 2\n" ) #### Go through and write out all of the kmers for kmer in ordered_kmers_to_consider_L: for other_kmer, KL, observed_over_unif_L in kmer_KLDiv_tuples_L: if (other_kmer == kmer): f_out.write("\n{0}\t{1:.4g}\t\t".format(kmer, KL)) for ratio in observed_over_unif_L: f_out.write("{0:.3f}\t".format(ratio)) ### </ Make a table of KL div by decreasing R of sig. R kmers > ### #### try to get the Z-score threshold for sig. enrichment kmers_type_annot = "Enriched $k$mers" try: num_std = int( float(ordered_kmers_description_fnames.split("std")[0])) kmers_type_annot += (" (Z-score $\geq${})".format(num_std)) except ValueError: if (ordered_kmers_description_fnames == "least"): kmers_type_annot = "Least Enriched $k$mers" else: kmers_type_annot = ordered_kmers_description_fnames #### Make the plot returned_fig_D = RBNS_plots.make_rectangular_heatmap_plot_RBNS_freqs( kmers_to_KL_Div_D, kmers_to_log2_Obs_over_Exp_L_D, control_kmers_to_KL_Div_D, control_kmers_to_log2_Obs_over_Exp_L_D, order_of_kmers_L=ordered_kmers_to_consider_L, kmers_type_annot=kmers_type_annot, title="{0}: {1} {2}mer frequencies across reads".format( protein.replace("_", " "), conc_label, k), colorbar_label=r"$log_2$(Observed / Uniform freq.)", max_log2_val_colormap=max_log2_val_colormap) return_D[k]["fig"] = returned_fig_D["fig"] return return_D