def postdictions_summary(args): from matplotlib import colors from _binding_data import binding_data obs_pre = {False: {False: 0, True: 2}, True: {False: 3, True: 1}} binding_dict = binding_data() results_root = '/home/labs/fleishman/jonathaw/no_backup/postdiction_new/results/' # run_filters = generate_run_filters(args={'ddg': -16, 'sasa': 1200, 'shape': 0.5, 'packstat': 0.5, 'buried_2': 30, # 'hbonds': 12}) # args_ = {'ddg': 12, 'sasa': 1400, 'shape': 0.45, 'packstat': 0.45, 'buried_2': 2, 'hbonds': 4} # args_ = {'ddg': 14, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 2, 'hbonds': 4} # harsh args_ = {'ddg': 16, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 2, 'hbonds': 6} run_filters = generate_run_filters(args_) sc_files = [a for a in os.listdir(results_root) if '.score' in a] results, cohs, docs = {}, [], [] bar_width = len(sc_files) sys.stdout.write("{%s}" % (" " * bar_width)) sys.stdout.flush() sys.stdout.write("\b" * (bar_width+1)) for sc_file in sc_files: sc_dict = score2dict(results_root+sc_file) passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters) coh_name = sc_file.split('all_')[1].split('_on_')[0] doc_name = re.split(pattern='_[0-9]{1,2}\.[0-9]{1,2}\.score', string=sc_file.split('_on_')[1])[0] # print(len(passed), coh_name, doc_name, binding_dict[coh_name][doc_name]) if coh_name not in results.keys(): results[coh_name] = {} cohs.append(coh_name) if doc_name not in docs: docs.append(doc_name) results[coh_name][doc_name] = obs_pre[binding_dict[coh_name][doc_name]][len(passed) >= 10] sys.stdout.write("%s" % random.choice(['!', '@', '#', '$', '%', '^', '&', '*', '(', ')'])) sys.stdout.flush() sys.stdout.write("}\n") # move data to data frame df = pd.DataFrame(columns=sorted(docs), index=sorted(cohs), data=-100) for coh, doc_dict in results.items(): for doc, res in doc_dict.items(): df[doc][coh] = res df = df.transpose() print(df) axis = plt.gca() cmap = colors.ListedColormap(['white', 'red', 'blue', 'green', 'yellow']) bounds = [-101, -0.5, 0.5, 1.5, 2.5, 3.5] norm = colors.BoundaryNorm(bounds, cmap.N) heatmap = plt.pcolor(np.array(df), cmap=cmap, norm=norm, edgecolors='k', linewidth=2) plt.yticks(np.arange(0.5, len(df.index), 1), [official_names(a) for a in df.index]) plt.xticks(np.arange(0.5, len(df.columns), 1), [official_names(a) for a in df.columns], rotation=70) axis.set_aspect('equal') legend = plt.colorbar(heatmap) # legend.ax.set_yticklabels(['NA', 'TN', 'TP', 'FP', 'FN']) legend.ax.get_yaxis().set_ticks([]) for j, lab in enumerate(['NA', 'TN', 'TP', 'FP', 'FN']): legend.ax.text(.5, (2 * j + 1) / 10.0, lab, ha='center', va='center') legend.ax.get_yaxis().labelpad = 15 plt.suptitle(str(args_)) plt.show()
def score_diagonal(args: dict): results = {} run_filters = generate_run_filters(args) sc_files = [a for a in os.listdir(args['score_dir']) if a[-6:] == '.score'] for sc_file in sc_files: score_dict = score2dict(sc_file) passed, failed = all_who_pass_run_filters(args, score_dict, run_filters) results[sc_file] = len(list(passed.keys())) print(sc_file, len(list(passed.keys())))
def prism(args): run_filters = generate_run_filters(args={'ddg': 16.0, 'sasa': 1200, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': 10}) score_dict = score2dict(args['score_file']) passed, failed = all_who_pass_run_filters(args, score_dict, run_filters) print('RMSD\tPassed\tFailed') for v in passed.values(): print('%3.3f\t%3.3f' % (v['rmsd'], v['ddg'])) for v in failed.values(): print('%3.3f\t\t%3.3f' % (v['rmsd'], v['ddg']))
def folder_scores(folder: str) -> dict: """ concatenates all the score files on the folder to one score dict :param folder: a folder address :return: {name: {filter: grade}} a score dict for the entire folder """ results = {} score_files = [a for a in os.listdir(folder) if a[-3:] == '.sc'] for score in score_files: results.update(score2dict(score)) return results
def swithces_from_diagonal(args, run_filters, coh_seq_dict, doc_seq_dict): """ :param args: run arguments :param run_filters: run filters :param coh_seq_dict: {name: AASeq()} of cohesins :param doc_seq_dict: {name: AASeq()} of dockerins :return: {switch_name: {design_name: #purples} """ # positions_dict = {'1anu': [32, 36, 62, 65, 69, 82, 115, 126], # '1aoh': [33, 37, 63, 66, 70, 83, 119, 130], # '1ohz': [33, 35, 37, 39, 63, 66, 68, 70, 73, 75, 77, 79, 81, 83, 121, 125, 127], # '2ccl': [33, 37, 63, 66, 70, 83, 116, 127]} # type_dict = {'D': 'n', 'E': 'n', 'K': 'p', 'R': 'p'} results = {} sc_files = [a for a in os.listdir(args['score_dir']) if a[-6:] == '.score'] bins = {} for sc_file in sc_files: score_dict = score2dict(sc_file) passed, failed = all_who_pass_run_filters(args, score_dict, run_filters) # results[sc_file] = len(list(passed.keys())) if len(list(passed.keys())) <= args['purples_threshold']: continue ### these kick out the date from the score names, and the makes it into the proper names: if '_11.10' in sc_file: coh_seq = coh_seq_dict[''.join( str(sc_file[4:-6] + '_0001.pdb.A').split('_11.10'))] doc_seq = doc_seq_dict[''.join( str(sc_file[4:-6] + '_0001.pdb.B').split('_11.10'))] elif '_12.10' in sc_file: coh_seq = coh_seq_dict[''.join( str(sc_file[4:-6] + '_0001.pdb.A').split('_12.10'))] doc_seq = doc_seq_dict[''.join( str(sc_file[4:-6] + '_0001.pdb.B').split('_12.10'))] result = Result(sc_file[4:-6], coh_seq, doc_seq, len(list(passed.keys()))) # pos_str = coh_seq.get_positions(positions_dict['1ohz']) # switch_str = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in pos_str]) # # doc_str = doc_seq.get_positions(list(doc_symm_poses.keys())) # doc_switch = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in doc_str]) # if switch_str not in bins.keys(): # bins[switch_str] = {} # if doc_switch not in bins[switch_str].keys(): # bins[switch_str][doc_switch] = {} # bins[switch_str][doc_switch][''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_11.10'))] \ # = {'doc_seq': doc_seq, 'purples': len(list(passed.keys()))} results[sc_file[4:-6]] = result # return bins return results
def prediction_results(args): sc_files = [a for a in os.listdir() if a[-6:] == '.score'] run_filters = generate_run_filters(args={'ddg': 18.0, 'sasa': 1300, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': 0}) for sc_file in sc_files: sc_dict = score2dict(sc_file) passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters) names = sc_file.split('_on_') coh = names[0].split('all_')[1] doc = names[1].split('_0')[0] print("%-10s %-10s %-4i %-3f %i" % (coh, doc, len(passed), 100*len(passed)/(len(failed)+len(passed)), len(sc_dict.keys())))
def get_no_docking_results(sc_name: str, no_dock_file: str) -> (float, float): """ :param sc_name: score name :param no_dock_file: score file :return: rmsd, ddg """ sc_dict = score2dict(no_dock_file) min_rmsd, min_ddg = 1000., 1000. for sc in sc_dict.values(): if sc['rmsd'] < min_rmsd: min_rmsd, min_ddg = sc['rmsd'], sc['ddg'] return min_rmsd, min_ddg
def analyse_minidiagonal(args): with open('../minidiagonal.txt', 'w') as fout: run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': 10}) counter = 0 score_files = [a for a in os.listdir('./') if a[-3:] == '.sc'] for sc in score_files: score_dict = score2dict(sc) passed, failed = all_who_pass_run_filters(args, score_dict, run_filters) if len(passed) > 5: fout.write('%s\t%i\n' % (sc, len(passed))) counter += 1 print('%i passed minidiagonal' % counter)
def post_pred_cliques(args): run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3}) if not os.path.isfile('./all_data.obj'): sc_files = [a for a in os.listdir('./') if '.score' in a] cohs_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_cohs.fasta') docs_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_docs.fasta') results = [] for sc_file in sc_files: seq_name = '_'.join(sc_file.split('_')[1:8]) coh_name = seq_name+'.pdb.gz.A' doc_name = seq_name+'.pdb.gz.B' sc_dict = score2dict(sc_file) ynum = re.search('y[0-9]{3}', sc_file).group(0) passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters) if len(passed) >= args['purples_threshold']: r = Result(seq_name, cohs_seqs[coh_name], docs_seqs[doc_name], len(passed)) results.append(r) with open('./all_data.obj', 'wb') as fout: pickle.dump(results, fout) else: with open('./all_data.obj', 'rb') as fin: results = pickle.load(fin) if not os.path.isfile('./graph.obj'): result_dict = {i+1: r for i, r in enumerate(results)} G = nx.Graph() [G.add_node(a) for a in result_dict.keys()] for n1 in G.nodes_iter(): for n2 in G.nodes_iter(): if n1 != n2: coh_sw_1, coh_sw_2 = result_dict[n1].coh_switch, result_dict[n2].coh_switch doc_sw_1, doc_sw_2 = result_dict[n1].doc_switch, result_dict[n2].doc_switch doc_wt_1, doc_wt_2 = result_dict[n1].doc_wt, result_dict[n2].doc_wt doc_diff = 1 if are_docs_from_diff_clusters(doc_wt_1, doc_wt_2) else 0 symm_switch = switch_symm_changer(doc_sw_2) if switches_differ({'diff_by': args['diff_by']}, coh_sw_1, coh_sw_2) >= args['diff_by'] and \ switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, doc_sw_2) + doc_diff >= args['doc_diff_by'] and \ switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, symm_switch) + doc_diff >= args['doc_diff_by']: G.add_edge(n1, n2) print('adding edge\n', result_dict[n1], '\n', result_dict[n2]) else: print('NOT\n', result_dict[n1], '\n', result_dict[n2]) cliques = [a for a in nx.find_cliques(G)] max_len = max([len(a) for a in cliques]) max_cliques = [a for a in cliques if len(a) == max_len] for clq in max_cliques: print(clq, '\n', '\n'.join([str(result_dict[a]) for a in clq]))
def find_thresholds_by_rmsd(args): """ :param args: run args :return: minimal/maximal (depending on threshold type) of the different filters that will pass all structures with RMSD under args['rmsd_threshold']. """ score_dict = score2dict(args['score_file']) filter_thresholds = dict(a_score=-100000., a_sasa=100000., a_shape=1.0, total_score=1000000., a_ddg=-100., a_packstat=1.0, a_buried_2=0) passed_rmsd = [] for name, sc in score_dict.items(): if sc['rmsd'] <= args['rmsd_threshold']: filter_thresholds['a_score'] = max([filter_thresholds['a_score'], sc['score']]) filter_thresholds['a_sasa'] = min([filter_thresholds['a_sasa'], sc['sasa']]) filter_thresholds['a_shape'] = min([filter_thresholds['a_shape'], sc['shape']]) filter_thresholds['total_score'] = min([filter_thresholds['total_score'], sc['total_score']]) filter_thresholds['a_ddg'] = max([filter_thresholds['a_ddg'], sc['ddg']]) filter_thresholds['a_packstat'] = min([filter_thresholds['a_packstat'], sc['packstat']]) filter_thresholds['a_buried_2'] = max([filter_thresholds['a_buried_2'], sc['buried_2']]) passed_rmsd.append(sc) print('found %i scores with rmsd <= %f' % (len(passed_rmsd), args['rmsd_threshold'])) # print('the old thresholds were:\n%s' % '\n'.join(['%s %f' % (k, v) for k, v in dimer_data().items()])) print('the old thresholds were:', generate_run_filters().report()) print('defined these new filters:\n%s' % '\n'.join(['%s %f' % (k, v) for k, v in filter_thresholds.items()])) args['dimer_data'] = filter_thresholds run_filters_updated = RunFilters() run_filters_updated.append_filter(Filter(name='a_ddg', typ='ddg', threshold=filter_thresholds['a_ddg'], limits=[-10000, 10000], under_over='under', g_name='$\Delta$$\Delta$G')) run_filters_updated.append_filter(Filter(name='a_score', typ='score', threshold=filter_thresholds['a_score'], limits=[-10000, 10000], under_over='under', g_name='Score')) run_filters_updated.append_filter(Filter(name='a_sasa', typ='sasa', threshold=filter_thresholds['a_sasa'], limits=[0, 100000], under_over='over', g_name='SASA')) run_filters_updated.append_filter(Filter(name='a_shape', typ='shape', threshold=filter_thresholds['a_shape'], limits=[0.0, 1.0], under_over='over', g_name='Shape Complementarity')) run_filters_updated.append_filter(Filter(name='a_packstat', typ='packstat', threshold=filter_thresholds['a_packstat'], limits=[0.0, 1.0], under_over='over', g_name='PackStat')) run_filters_updated.append_filter(Filter(name='a_buried_2', typ='buried_2', threshold=filter_thresholds['a_buried_2'], limits=[0, 100], under_over='under', g_name='UnsatisfiedHBonds')) run_filters_updated.append_filter(Filter(name='a_rms', typ='rmsd', threshold=1000, limits=[0, 1000], under_over='under', g_name='RMSD')) passed, failed = all_who_pass_run_filters(args, score_dict, run_filters_updated) # this_vs_that(args, run_filters_updated, passed, failed, score_dict) multiple_plots(args, run_filters_updated, passed, failed, score_dict) args['x'], args['y'] = 'rmsd', 'ddg' this_vs_that(args, run_filters_updated, passed, failed, score_dict) plt.show()
def swithces_from_diagonal(args, run_filters, coh_seq_dict, doc_seq_dict): """ :param args: run arguments :param run_filters: run filters :param coh_seq_dict: {name: AASeq()} of cohesins :param doc_seq_dict: {name: AASeq()} of dockerins :return: {switch_name: {design_name: #purples} """ # positions_dict = {'1anu': [32, 36, 62, 65, 69, 82, 115, 126], # '1aoh': [33, 37, 63, 66, 70, 83, 119, 130], # '1ohz': [33, 35, 37, 39, 63, 66, 68, 70, 73, 75, 77, 79, 81, 83, 121, 125, 127], # '2ccl': [33, 37, 63, 66, 70, 83, 116, 127]} # type_dict = {'D': 'n', 'E': 'n', 'K': 'p', 'R': 'p'} results = {} sc_files = [a for a in os.listdir(args['score_dir']) if a[-6:] == '.score'] bins = {} for sc_file in sc_files: score_dict = score2dict(sc_file) passed, failed = all_who_pass_run_filters(args, score_dict, run_filters) # results[sc_file] = len(list(passed.keys())) if len(list(passed.keys())) <= args['purples_threshold']: continue ### these kick out the date from the score names, and the makes it into the proper names: if '_11.10' in sc_file: coh_seq = coh_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_11.10'))] doc_seq = doc_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.B').split('_11.10'))] elif '_12.10' in sc_file: coh_seq = coh_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_12.10'))] doc_seq = doc_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.B').split('_12.10'))] result = Result(sc_file[4:-6], coh_seq, doc_seq, len(list(passed.keys()))) # pos_str = coh_seq.get_positions(positions_dict['1ohz']) # switch_str = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in pos_str]) # # doc_str = doc_seq.get_positions(list(doc_symm_poses.keys())) # doc_switch = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in doc_str]) # if switch_str not in bins.keys(): # bins[switch_str] = {} # if doc_switch not in bins[switch_str].keys(): # bins[switch_str][doc_switch] = {} # bins[switch_str][doc_switch][''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_11.10'))] \ # = {'doc_seq': doc_seq, 'purples': len(list(passed.keys()))} results[sc_file[4:-6]] = result # return bins return results
def filter_result_for_passed(args): """ analyses a specific filter behaviour for all passed structures :param args: run arguments :return: """ run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6}) score_files = sorted(args['score_files']) results = {} for i in score_files: results[i] = {} score_dict = score2dict(i) passed, failed = all_who_pass_run_filters(args, score_dict, run_filters) results[i]['filter_passed'] = [] for name, psd in passed.items(): results[i]['filter_passed'].append(psd[args['filter']]) plt.boxplot([a['filter_passed'] for a in results.values()], labels=[a.split('.score')[0] for a in results.keys()]) plt.show()
def creat_coh_doc_purples(): results_path = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/reclique_18Nov/cliques_prediction/results/' sc_files = [a for a in os.listdir(results_path) if '.score' in a] run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': -10.}) if not os.path.isfile(results_path+'analysed.obj'): coh_doc_purples = {} for sc_file in sc_files: sc_dict = score2dict(results_path+sc_file) passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters) coh_name = what_coh(sc_file, args={'naming': 'coh_on_doc'}) doc_name = what_doc(sc_file, args={'naming': 'coh_on_doc'}) if coh_name not in coh_doc_purples.keys(): coh_doc_purples[coh_name] = {} coh_doc_purples[coh_name][doc_name] = len(passed) pickle.dump(coh_doc_purples, open(results_path+'analysed.obj', 'wb')) else: print("reading coh_doc_purples") coh_doc_purples = pickle.load(open(results_path+'analysed.obj', 'rb')) return coh_doc_purples
def analyse_matrix(args: dict): # run_filters = generate_run_filters(args={'ddg': 22.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6}) # run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6}) # run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6}) run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': -10.}) # run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.65, 'buried_2': 3}) # print(run_filters.report()) score_files = [a for a in os.listdir('./') if a[-6:] == '.score'] coh_name_list = sorted(list(set([what_coh(a, args) for a in score_files]))) doc_name_list = sorted(list(set([what_doc(a, args) for a in score_files]))) # print(coh_name_list) # print(doc_name_list) # df = DataFrame([{coh: Series([-1] * len(doc_name_list))} for coh in coh_name_list], index=doc_name_list) df = DataFrame([{coh: -1 for coh in coh_name_list}] * len(doc_name_list), index=doc_name_list) for sc in score_files: score_dict = score2dict(sc) passed, failed = all_who_pass_run_filters(args, score_dict, run_filters) coh = what_coh(sc, args) doc = what_doc(sc, args) df[coh][doc] = len(passed) # print(df) show_prediction_heat_map(df)
if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('-score_file') parser.add_argument('-coh_seqs_file') parser.add_argument('-doc_seqs_file') parser.add_argument('-mode') parser.add_argument('-n', type=int, default=1) parser.add_argument('-diff_by', type=int, default=2) parser.add_argument('-doc_diff_by', type=int, default=1) parser.add_argument('-score_dir', type=str, default='./') parser.add_argument('-purples_threshold', type=int, default=50) args = vars(parser.parse_args()) if args['mode'] != 'bins_diagonal' and args['mode'] != 'post_pred_cliques' and args['mode'] != 'minidiagonal_cliques': scores = score2dict(args['score_file']) run_filters = ''#generate_run_filters() coh_seq_dict = read_multi_fastas(args['coh_seqs_file'], suffix_to_remove='.pdb.gz') doc_seq_dict = read_multi_fastas(args['doc_seqs_file'], suffix_to_remove='.pdb.gz') if args['mode'] == 'switches_n_cliques': switches, num_bins = make_switches(args, scores, run_filters, coh_seq_dict) max_cliques = best_cliques(args, list(switches.keys())) with open('switches.obj', 'wb') as sw_file: pickle.dump(switches, sw_file) with open('max_cliques.obj', 'wb') as clq_file: pickle.dump(max_cliques, clq_file) elif args['mode'] == 'choose_by_identity': with open('switches.obj', 'rb') as sw_in:
def post_pred_cliques(args): run_filters = generate_run_filters(args={ 'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3 }) if not os.path.isfile('./all_data.obj'): sc_files = [a for a in os.listdir('./') if '.score' in a] cohs_seqs = read_multi_fastas( '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_cohs.fasta' ) docs_seqs = read_multi_fastas( '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_docs.fasta' ) results = [] for sc_file in sc_files: seq_name = '_'.join(sc_file.split('_')[1:8]) coh_name = seq_name + '.pdb.gz.A' doc_name = seq_name + '.pdb.gz.B' sc_dict = score2dict(sc_file) ynum = re.search('y[0-9]{3}', sc_file).group(0) passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters) if len(passed) >= args['purples_threshold']: r = Result(seq_name, cohs_seqs[coh_name], docs_seqs[doc_name], len(passed)) results.append(r) with open('./all_data.obj', 'wb') as fout: pickle.dump(results, fout) else: with open('./all_data.obj', 'rb') as fin: results = pickle.load(fin) if not os.path.isfile('./graph.obj'): result_dict = {i + 1: r for i, r in enumerate(results)} G = nx.Graph() [G.add_node(a) for a in result_dict.keys()] for n1 in G.nodes_iter(): for n2 in G.nodes_iter(): if n1 != n2: coh_sw_1, coh_sw_2 = result_dict[ n1].coh_switch, result_dict[n2].coh_switch doc_sw_1, doc_sw_2 = result_dict[ n1].doc_switch, result_dict[n2].doc_switch doc_wt_1, doc_wt_2 = result_dict[n1].doc_wt, result_dict[ n2].doc_wt doc_diff = 1 if are_docs_from_diff_clusters( doc_wt_1, doc_wt_2) else 0 symm_switch = switch_symm_changer(doc_sw_2) if switches_differ({'diff_by': args['diff_by']}, coh_sw_1, coh_sw_2) >= args['diff_by'] and \ switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, doc_sw_2) + doc_diff >= args['doc_diff_by'] and \ switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, symm_switch) + doc_diff >= args['doc_diff_by']: G.add_edge(n1, n2) print('adding edge\n', result_dict[n1], '\n', result_dict[n2]) else: print('NOT\n', result_dict[n1], '\n', result_dict[n2]) cliques = [a for a in nx.find_cliques(G)] max_len = max([len(a) for a in cliques]) max_cliques = [a for a in cliques if len(a) == max_len] for clq in max_cliques: print(clq, '\n', '\n'.join([str(result_dict[a]) for a in clq]))
def jack_matrix(args): """ displays a matrix of ddG Vs. RMSD :param args: run arguments :return: """ args['show_fig'] = False fig = plt.figure() # fig = plt.figure(figsize=(8.27, 11.69)) # run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6}) run_filters = generate_run_filters(args={'ddg': -16, 'sasa': 1200, 'shape': 0.5, 'packstat': 0.5, 'buried_2': 30, 'hbonds': 10}) jk_scores = [a for a in os.listdir('./') if a[-6:] == '.score' and 'SD' not in a and 'no_docking' not in a] sd_scores = {} for sc_f in [a for a in os.listdir('./') if a[-6:] == '.score' and 'SD' in a]: if sc_f[4:8] not in sd_scores.keys(): sd_scores[sc_f[4:8]] = {} sd_scores[sc_f[4:8]][sc_f[12:16]] = sc_f no_dock_files = {} for sc_f in [a for a in os.listdir('./') if 'no_docking' in a]: if sc_f[:4] not in no_dock_files.keys(): no_dock_files[sc_f[:4]] = {} no_dock_files[sc_f[:4]][sc_f[11:15]] = sc_f plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.15, hspace=0.25) matplotlib.rcParams['axes.linewidth'] = 0.8 font0 = FontProperties() font = font0.copy() font.set_weight('semibold') sorted_scores = sorted(jk_scores) z = sorted_scores[1] sorted_scores[1] = sorted_scores[-2] sorted_scores[-2] = z sorted_scores = [sorted_scores[0], sorted_scores[1]] + sorted(sorted_scores[2:]) for num, sc in enumerate(sorted_scores): ax = plt.subplot(3, 3, 1+num) sc_dict = score2dict(sc) passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters) x_passed, y_passed, x_failed, y_failed = [], [], [], [] for k, v in passed.items(): x_passed.append(v[args['x']]) y_passed.append(v[args['y']]) for k, v in failed.items(): x_failed.append(v[args['x']]) y_failed.append(v[args['y']]) # draw simple docking results if sc[4:8] in sd_scores.keys(): crystals = list(sd_scores[sc[4:8]].keys()) sd_sc_dict = score2dict(sd_scores[sc[4:8]][crystals[0]]) plt.scatter([a['rmsd'] for a in sd_sc_dict.values()], [a['ddg'] for a in sd_sc_dict.values()], marker='.', c='lightgrey', s=30, linewidths=0) if len(crystals) > 1: sd_sc_dict = score2dict(sd_scores[sc[4:8]][crystals[1]]) plt.scatter([a['rmsd'] for a in sd_sc_dict.values()], [a['ddg'] for a in sd_sc_dict.values()], marker='.', c='grey', s=30, linewidths=0) # draw DoCohModeller results plt.scatter(x_failed, y_failed, marker='.', c='blue', alpha=0.6, s=50, linewidth=0.2) plt.scatter(x_passed, y_passed, marker='.', c='red', alpha=0.6, s=100, linewidths=0.3) # draw no docking results no_dock_rmsd_1, no_dock_ddg_1 = get_no_docking_results(sc, list(no_dock_files[sc[4:8]].values())[0]) print('marking %s with %f, %f' % (sc[4:8], no_dock_rmsd_1, no_dock_ddg_1)) plt.scatter(no_dock_rmsd_1, no_dock_ddg_1, c='lightgrey', marker='^', s=60) if len(no_dock_files[sc[4:8]].keys()) > 1: no_dock_rmsd_2, no_dock_ddg_2 = get_no_docking_results(sc, list(no_dock_files[sc[4:8]].values())[1]) print('marking %s with %f, %f' % (sc[4:8], no_dock_rmsd_2, no_dock_ddg_2)) plt.scatter(no_dock_rmsd_2, no_dock_ddg_2, c='grey', marker='^', s=60) plt.xlim([0, 20]) plt.ylim([-32.5, 0]) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') plt.yticks([0, -10, -20, -30], fontsize=16) plt.xticks(fontsize=16) if num not in [0, 3, 6]: plt.setp(ax.get_yticklabels(), visible=False) if num not in [6, 7, 8]: plt.setp(ax.get_xticklabels(), visible=False) plt.title(sc.split('_')[1].upper(), fontsize=16, fontproperties=font) fig.text(0.5, 0.04, r'RMSD ($\AA$)', ha='center', va='center', fontsize=24) fig.text(0.06, 0.5, '∆∆G (R.E.U.)', ha='center', va='center', rotation='vertical', fontsize=24) plt.savefig('jack.png', dpi=100) plt.show()
parser = argparse.ArgumentParser() parser.add_argument('-score_file') parser.add_argument('-coh_seqs_file') parser.add_argument('-doc_seqs_file') parser.add_argument('-mode') parser.add_argument('-n', type=int, default=1) parser.add_argument('-diff_by', type=int, default=2) parser.add_argument('-doc_diff_by', type=int, default=1) parser.add_argument('-score_dir', type=str, default='./') parser.add_argument('-purples_threshold', type=int, default=50) args = vars(parser.parse_args()) if args['mode'] != 'bins_diagonal' and args[ 'mode'] != 'post_pred_cliques' and args[ 'mode'] != 'minidiagonal_cliques': scores = score2dict(args['score_file']) run_filters = '' #generate_run_filters() coh_seq_dict = read_multi_fastas(args['coh_seqs_file'], suffix_to_remove='.pdb.gz') doc_seq_dict = read_multi_fastas(args['doc_seqs_file'], suffix_to_remove='.pdb.gz') if args['mode'] == 'switches_n_cliques': switches, num_bins = make_switches(args, scores, run_filters, coh_seq_dict) max_cliques = best_cliques(args, list(switches.keys())) with open('switches.obj', 'wb') as sw_file: pickle.dump(switches, sw_file) with open('max_cliques.obj', 'wb') as clq_file: pickle.dump(max_cliques, clq_file)
def multi_filters_plot(args): """ :param args: run arguments, not used :return: draws multiple plots for the different run filter configurations (for my thesis) """ from matplotlib import colors from numpy import array, arange cmap = colors.ListedColormap(['white', 'cornflowerblue', 'darkturquoise', 'darkorange']) bounds = [-100, 0, 10, 20, 100] norm = colors.BoundaryNorm(bounds, cmap.N) fig = plt.figure(figsize=(6.02, 6.38)) plt.subplots_adjust(left=0.25, bottom=0.15, right=None, top=None, wspace=0.1, hspace=0.2) various_filters = OrderedDict() various_filters['ddG'] = generate_run_filters( args={'ddg': 24.0, 'sasa': 0000, 'shape': 0., 'packstat': 0.0, 'buried_2': 30, 'hbonds': 0.}) various_filters['ddG_SASA'] = generate_run_filters( args={'ddg': 24.0, 'sasa': 1400, 'shape': 0., 'packstat': 0.0, 'buried_2': 30, 'hbonds': 0.}) various_filters['ddG_SASA_pack'] = generate_run_filters( args={'ddg': 24.0, 'sasa': 1400, 'shape': 0., 'packstat': 0.6, 'buried_2': 30, 'hbonds': 0.}) various_filters['ddG_SASA_pack_shape'] = generate_run_filters( args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 30, 'hbonds': 0.}) various_filters['ddG_SASA_pack_shape_buried'] = generate_run_filters(args={ 'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': 0.}) various_filters['ddG_SASA_pack_shape_buried_hbonds'] = generate_run_filters( args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': -10.}) num_letter = {1: 'A', 2: 'B', 3: 'C', 4: 'D', 5: 'E', 6: 'F'} score_files = [a for a in os.listdir('./') if a[-6:] == '.score'] coh_name_list = sorted(list(set([what_coh(a) for a in score_files]))) doc_name_list = sorted(list(set([what_doc(a) for a in score_files]))) scores_dict = {} for sc in score_files: scores_dict[sc] = score2dict(sc) i = 1 for name, filters in various_filters.items(): df = DataFrame([{coh: -1 for coh in coh_name_list}] * len(doc_name_list), index=doc_name_list) for sc, sc_dict in scores_dict.items(): passed, failed = all_who_pass_run_filters({}, sc_dict, filters) coh = what_coh(sc) doc = what_doc(sc) df[coh][doc] = len(passed) plt.subplot(3, 2, i) axis = plt.gca() heatmap = plt.pcolor(array(df), cmap=cmap, norm=norm, edgecolors='k', linewidth=2) for y in range(array(df.shape)[0]): for x in range(array(df.shape)[1]): if array(df)[y, x] >= 0: plt.text(x+0.5, y+0.5, array(df)[y, x], horizontalalignment='center', verticalalignment='center', fontsize=6) # make sure labels are only on outer subplots if i in [1, 3, 5]: plt.yticks(arange(0.5, len(df.index), 1), [official_names(n) for n in df.index], fontsize=10) else: plt.yticks([]) if i in [5, 6]: plt.xticks(arange(0.5, len(df.columns), 1), [official_names(n) for n in df.columns], rotation=70, fontsize=10) else: plt.xticks([]) plt.title(num_letter[i]) # plt.title(official_title(name)) i += 1 # axis.set_aspect('equal') fig.text(0.5, 0.04, 'Cohesin name', ha='center', va='center', fontsize=24) fig.text(0.06, 0.5, 'Dockerin name', ha='center', va='center', rotation='vertical', fontsize=24) plt.savefig('mini_postdiction.png', dpi=600) plt.show()
args = vars(parser.parse_args()) if args['mode'] == 'how_many_pass': score_dict = score2dict_new(args['score_file']) all_who = all_who_pass(args, score_dict) percentage = 100.0*float(len(all_who))/float(len(score_dict.values())) print('there were %i purples out of %i, which is %f' % (len(all_who), len(score_dict.values()), percentage)) if args['show_all']: for k in all_who: print(k['description']+'.pdb') elif args['mode'] == 'analyse': # args['run_filters'] = generate_run_filters() score_dict = score2dict(args['score_file']) passed = all_who_pass(args, score_dict) print('found %i purples' % len(passed)) if len(passed) == 0: print('non passed, so the lowest ddg are:') best_n_structs = best_n_structures(args, score_dict) else: print('had passed, so best ddg out of those:') passed_score_dict = {a['description']: a for a in passed} best_n_structs = best_n_structures(args, passed_score_dict) print('the best %i strucutres by filter %s are \n%s.pdb' % (args['n'], args['filter'], '.pdb '.join([a['description'] for a in best_n_structs]))) if args['show_all']: print('showing ALL structures that passed the thresholds:') print('.pdb '.join([a['description'] for a in passed]) + '.pdb')
'buried_2': 3, 'hbonds': 10 }) all_docs = [a for a in os.listdir(topath) if 'doc_' in a] fout = open(topath + 'minidiagonal_full_names.txt', 'w') for doc in all_docs: all_dirs = os.listdir(topath + doc) for dir in all_dirs: try: sc_file = [ a for a in os.listdir(topath + doc + '/' + dir) if a[-3:] == '.sc' ] if sc_file: sc_dict = score2dict(topath + doc + '/' + dir + '/' + sc_file[0]) passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters) if len(passed) > 5: fout.write('%s\t%i\n' % (dir, len(passed))) shutil.copy( topath + doc + '/' + dir + '/' + list(sc_dict.keys())[0] + '.pdb.gz', topath + 'minidiagonal_pdbs') except: print('no folder', dir) fout.close() # def analyse_minidiagonal(args): # with open('../minidiagonal.txt', 'w') as fout: # run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
from RosettaFilter import score2dict topath = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/clique_6_pdbs/mini_diagonal_11Nov/' run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': 10}) all_docs = [a for a in os.listdir(topath) if 'doc_' in a] fout = open(topath+'minidiagonal_full_names.txt', 'w') for doc in all_docs: all_dirs = os.listdir(topath+doc) for dir in all_dirs: try: sc_file = [a for a in os.listdir(topath+doc+'/'+dir) if a[-3:] == '.sc'] if sc_file: sc_dict = score2dict(topath+doc+'/'+dir+'/'+sc_file[0]) passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters) if len(passed) > 5: fout.write('%s\t%i\n' % (dir, len(passed))) shutil.copy(topath+doc+'/'+dir+'/'+list(sc_dict.keys())[0]+'.pdb.gz', topath+'minidiagonal_pdbs') except: print('no folder', dir) fout.close() # def analyse_minidiagonal(args): # with open('../minidiagonal.txt', 'w') as fout: # run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, # 'hbonds': 10}) # counter = 0 # score_files = [a for a in os.listdir('./') if a[-3:] == '.sc'] # for sc in score_files:
#!/usr/bin/env python3.5 from RosettaFilter import score2dict import sys __author__ = 'jonathan' score_dict = score2dict(sys.argv[1]) print(score_dict[sys.argv[2]])