def show_predcition_matrix(prediction: dict) -> None: obs_pre = {0: {0: 0, 1: 2}, 1: {0: 3, 1: 1}} binding_ = binding_data() df = pd.DataFrame(data=0, index=list(binding_.values())[0].keys(), columns=binding_.keys()) for coh, doc_ in binding_.items(): for doc, obs in doc_.items(): df[coh][doc] = obs_pre[binding_[coh][doc]][prediction[coh][doc]] plt.figure() axis = plt.gca() cmap = colors.ListedColormap(['white', 'cornflowerblue', 'red', 'darkorange']) bounds = [-0.5, 0.5, 1.5, 2.5, 3.5] norm = colors.BoundaryNorm(bounds, cmap.N) heatmap = plt.pcolor(array(df), cmap=cmap, norm=norm, edgecolors='k', linewidth=2) for y in range(array(df.shape)[0]): for x in range(array(df.shape)[1]): if array(df)[y, x] >= 0: plt.text(x+0.5, y+0.5, array(df)[y, x], horizontalalignment='center', verticalalignment='center') plt.yticks(arange(0.5, len(df.index), 1), df.index) plt.xticks(arange(0.5, len(df.columns), 1), df.columns, rotation=70) plt.xlabel('Cohesin name', style='oblique') plt.ylabel('Dockerin name', style='oblique') axis.set_aspect('equal') plt.title('Cohesin dockerin cross binding') plt.suptitle('0: obs no pred no, 1: obs yes, pred yes\n2: obs no pred yes, 3: obs yes pred no') plt.show()
def postdictions_summary(args): from matplotlib import colors from _binding_data import binding_data obs_pre = {False: {False: 0, True: 2}, True: {False: 3, True: 1}} binding_dict = binding_data() results_root = '/home/labs/fleishman/jonathaw/no_backup/postdiction_new/results/' # run_filters = generate_run_filters(args={'ddg': -16, 'sasa': 1200, 'shape': 0.5, 'packstat': 0.5, 'buried_2': 30, # 'hbonds': 12}) # args_ = {'ddg': 12, 'sasa': 1400, 'shape': 0.45, 'packstat': 0.45, 'buried_2': 2, 'hbonds': 4} # args_ = {'ddg': 14, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 2, 'hbonds': 4} # harsh args_ = {'ddg': 16, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 2, 'hbonds': 6} run_filters = generate_run_filters(args_) sc_files = [a for a in os.listdir(results_root) if '.score' in a] results, cohs, docs = {}, [], [] bar_width = len(sc_files) sys.stdout.write("{%s}" % (" " * bar_width)) sys.stdout.flush() sys.stdout.write("\b" * (bar_width+1)) for sc_file in sc_files: sc_dict = score2dict(results_root+sc_file) passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters) coh_name = sc_file.split('all_')[1].split('_on_')[0] doc_name = re.split(pattern='_[0-9]{1,2}\.[0-9]{1,2}\.score', string=sc_file.split('_on_')[1])[0] # print(len(passed), coh_name, doc_name, binding_dict[coh_name][doc_name]) if coh_name not in results.keys(): results[coh_name] = {} cohs.append(coh_name) if doc_name not in docs: docs.append(doc_name) results[coh_name][doc_name] = obs_pre[binding_dict[coh_name][doc_name]][len(passed) >= 10] sys.stdout.write("%s" % random.choice(['!', '@', '#', '$', '%', '^', '&', '*', '(', ')'])) sys.stdout.flush() sys.stdout.write("}\n") # move data to data frame df = pd.DataFrame(columns=sorted(docs), index=sorted(cohs), data=-100) for coh, doc_dict in results.items(): for doc, res in doc_dict.items(): df[doc][coh] = res df = df.transpose() print(df) axis = plt.gca() cmap = colors.ListedColormap(['white', 'red', 'blue', 'green', 'yellow']) bounds = [-101, -0.5, 0.5, 1.5, 2.5, 3.5] norm = colors.BoundaryNorm(bounds, cmap.N) heatmap = plt.pcolor(np.array(df), cmap=cmap, norm=norm, edgecolors='k', linewidth=2) plt.yticks(np.arange(0.5, len(df.index), 1), [official_names(a) for a in df.index]) plt.xticks(np.arange(0.5, len(df.columns), 1), [official_names(a) for a in df.columns], rotation=70) axis.set_aspect('equal') legend = plt.colorbar(heatmap) # legend.ax.set_yticklabels(['NA', 'TN', 'TP', 'FP', 'FN']) legend.ax.get_yaxis().set_ticks([]) for j, lab in enumerate(['NA', 'TN', 'TP', 'FP', 'FN']): legend.ax.text(.5, (2 * j + 1) / 10.0, lab, ha='center', va='center') legend.ax.get_yaxis().labelpad = 15 plt.suptitle(str(args_)) plt.show()
def parse_binding_data() -> pd.DataFrame: """ :return: data frame 'coh_name', 'doc_name', 'coh_seq', 'doc_seq', 'binders' for Rachel's data """ from _binding_data import binding_data rachel_root = '/home/labs/fleishman/jonathaw/decision_tree/' cohs = read_multi_fastas(rachel_root+'cohesins_from_rachel_and_vered.fasta_aln', suffix_to_remove='/', lower=True) docs = read_multi_fastas(rachel_root+'dockerins_from_rachel_and_vered.fasta_aln', suffix_to_remove='/', lower=True) rachel_bind = binding_data() vered_bind = parse_vered_binding() result = pd.DataFrame(columns=['coh_name', 'doc_name', 'coh_seq', 'doc_seq', 'binders']) i = 1 for coh, docs_dict in rachel_bind.items(): for doc, res in docs_dict.items(): result.loc[i] = [coh, doc, cohs[coh], docs[doc], rachel_bind[coh][doc]] i += 1 for coh, docs_dict in vered_bind.items(): for doc, res in docs_dict.items(): result.loc[i] = [coh, doc, cohs[coh], docs[doc], vered_bind[coh][doc] == 1] i += 1 for name in ['1ohz', '2b59', '2ozn', '2vn5', '2y3n', '3ul4', '4fl4', '4fl5', '4dh2', '4uyp', '5new']: result.loc[i] = [name, name, cohs[name], docs[name], True] i += 1 print('there are %i rows in the data' % (i-1)) return result
def parse_binding_data() -> pd.DataFrame: # cohs = read_multi_fastas(root_path+'cohs_specific_pos.fasta', suffix_to_remove='/') # docs = read_multi_fastas(root_path+'docs_specific_pos.fasta', suffix_to_remove='/') cohs, docs = retrive_relevant_poses() data = binding_data() colums = ['coh', 'doc'] + \ ['core coh %i %s' % (i, aa) for i in [1, 2] for aa in aa2num.keys()] + \ ['core doc %i %s' % (i, aa) for i in [1, 2, 3] for aa in aa2num.keys()] + \ ['rim coh %i %s' % (i, t) for i in range(1, 19, 1) for t in types] + \ ['rim doc %i %s' % (i, t) for i in range(1, 8, 1) for t in types] + ['binding'] df = pd.DataFrame(columns=colums) i = 1 for coh, doc_dict in data.items(): coh_seq = cohs[coh].get_seq for doc, res in doc_dict.items(): doc_seq = docs[doc].get_seq df.loc[i] = [coh, doc] + seqs2row(coh_seq, doc_seq) + [1 if res else 0] i += 1 return df
def show_predcition_matrix(prediction: dict) -> None: obs_pre = {0: {0: 0, 1: 2}, 1: {0: 3, 1: 1}} binding_ = binding_data() df = pd.DataFrame(data=0, index=list(binding_.values())[0].keys(), columns=binding_.keys()) for coh, doc_ in binding_.items(): for doc, obs in doc_.items(): df[coh][doc] = obs_pre[binding_[coh][doc]][prediction[coh][doc]] plt.figure() axis = plt.gca() cmap = colors.ListedColormap( ['white', 'cornflowerblue', 'red', 'darkorange']) bounds = [-0.5, 0.5, 1.5, 2.5, 3.5] norm = colors.BoundaryNorm(bounds, cmap.N) heatmap = plt.pcolor(array(df), cmap=cmap, norm=norm, edgecolors='k', linewidth=2) for y in range(array(df.shape)[0]): for x in range(array(df.shape)[1]): if array(df)[y, x] >= 0: plt.text(x + 0.5, y + 0.5, array(df)[y, x], horizontalalignment='center', verticalalignment='center') plt.yticks(arange(0.5, len(df.index), 1), df.index) plt.xticks(arange(0.5, len(df.columns), 1), df.columns, rotation=70) plt.xlabel('Cohesin name', style='oblique') plt.ylabel('Dockerin name', style='oblique') axis.set_aspect('equal') plt.title('Cohesin dockerin cross binding') plt.suptitle( '0: obs no pred no, 1: obs yes, pred yes\n2: obs no pred yes, 3: obs yes pred no' ) plt.show()