def compare_designs(args): all_seqs = read_seqs(args['fasta'], '.pdb') sc_df = Rf.score_file2df(args['sc']) best_by_aa_freq_df = Rf.get_best_num_by_term(sc_df, 10, 'a_tms_aa_comp') names_to_use = list(best_by_aa_freq_df['description']) names_to_use += [args['sc'].split('.')[0].split('all_')[1]] all_seqs = {k: v for k, v in all_seqs.items() if k in names_to_use} seqs_aa_freqs = {n: {} for n in all_seqs.keys() if n in names_to_use} freq_dists = {} for n, s in all_seqs.items(): seqs_aa_freqs[n] = s.all_aas_frequencies(clean_zeros=True) # freq_dists[n] = calc_freq_dist(seqs_aa_freqs[n], wt_aa_freq, # 'AGFILPSTVWY') freq_dists[n] = calc_freq_dist(seqs_aa_freqs[n], wt_aa_freq) freq_dists = OrderedDict(sorted(freq_dists.items(), key=lambda t: t[1])) fig = plt.figure(figsize=(18, 18)) nrows = math.floor(len(all_seqs.keys()) / 4) nrows += 1 if len(all_seqs.keys()) % 4 > 0 else 0 i = 0 # for n, s in all_seqs.items(): first = True for n, dist_freq in freq_dists.items(): if first: print(n) first = False s = all_seqs[n] plt.subplot(nrows, 4, 1 + i) fracs = [a for a in seqs_aa_freqs[n].values()] labels = [a for a in seqs_aa_freqs[n].keys()] colors = [color_map[a] for a in labels] plt.pie(fracs, labels=labels, autopct='%1.1f%%', colors=colors) name = n.split('.pdb')[0] if name.count('poly') == 2: p_name = name.split('_poly')[0] else: p_name = name if name in list(sc_df['description']): title = ( '%s\nscore: %.0f ∆∆G: %.0f\nfreq_dist: %.2f\naa_comp %.2f' % (p_name, sc_df[sc_df['description'] == name]['score'], sc_df[sc_df['description'] == name]['a_ddg'], dist_freq * 100, best_by_aa_freq_df[best_by_aa_freq_df['description'] == name]['a_tms_aa_comp'])) else: title = 'freq_dist %.2f' % (dist_freq * 100) plt.title(title) i += 1 if args['show'] == 'show': plt.show() else: plt.savefig('%s_aa_freq.png' % args['fasta'])
def meta_pie(args: dict): mut_aa_freqs = {aa: 0 for aa in aas} score_files = [a for a in os.listdir(args['dir']) if '.score' in a] for fasta_file in [a for a in os.listdir(args['dir']) if '.fasta' in a]: temp_seqs = read_seqs('%s/%s' % (args['dir'], fasta_file), remove_suffix='.pdb') temp_sc = Rf.score_file2df('%s/%s.score' % (args['dir'], fasta_file.split('.')[0])) names = list( Rf.get_best_num_by_term(temp_sc, 10, 'a_tms_aa_comp')['description']) for n, aaseq in temp_seqs.items(): if n in names: for aa in aas: mut_aa_freqs[aa] += (aaseq.aa_frequency(aa) * len(aaseq)) mut_aa_freqs_srt = OrderedDict( sorted(mut_aa_freqs.items(), key=lambda t: t[1])) ori_aa_freqs = {aa: 0 for aa in aas} for aaseq in read_seqs('/home/labs/fleishman/jonathaw/elazaridis/design/' + 'polyA_13Nov/chosen_from_all_27Feb/pdbs/' + 'all_dzns.fasta').values(): for aa in aas: ori_aa_freqs[aa] += (aaseq.aa_frequency(aa) * len(aaseq)) ori_aa_freqs_srt = OrderedDict( sorted(ori_aa_freqs.items(), key=lambda t: t[1])) plt.figure() plt.subplot(1, 3, 1) plt.title('natural TMs') plt.pie(list(wt_aa_freq.values()), labels=list(wt_aa_freq.keys()), autopct='%1.1f%%', colors=[color_map[a] for a in list(wt_aa_freq.keys())]) plt.axis('equal') plt.subplot(1, 3, 2) plt.title('original designs') plt.pie(list(ori_aa_freqs_srt.values()), labels=list(ori_aa_freqs_srt.keys()), autopct='%1.1f%%', colors=[color_map[a] for a in list(ori_aa_freqs_srt.keys())]) plt.axis('equal') plt.subplot(1, 3, 3) plt.title('mutated designs') plt.pie(list(mut_aa_freqs_srt.values()), labels=list(mut_aa_freqs_srt.keys()), autopct='%1.1f%%', colors=[color_map[a] for a in list(mut_aa_freqs_srt.keys())]) plt.axis('equal') plt.show()