def postdictions_summary(args):
    from matplotlib import colors
    from _binding_data import binding_data
    obs_pre = {False: {False: 0, True: 2}, True: {False: 3, True: 1}}
    binding_dict = binding_data()
    results_root = '/home/labs/fleishman/jonathaw/no_backup/postdiction_new/results/'
    # run_filters = generate_run_filters(args={'ddg': -16, 'sasa': 1200, 'shape': 0.5, 'packstat': 0.5, 'buried_2': 30,
    #                                          'hbonds': 12})
    # args_ = {'ddg': 12, 'sasa': 1400, 'shape': 0.45, 'packstat': 0.45, 'buried_2': 2, 'hbonds': 4}
    # args_ = {'ddg': 14, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 2, 'hbonds': 4} # harsh
    args_ = {'ddg': 16, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 2, 'hbonds': 6}
    run_filters = generate_run_filters(args_)
    sc_files = [a for a in os.listdir(results_root) if '.score' in a]

    results, cohs, docs = {}, [], []
    bar_width = len(sc_files)
    sys.stdout.write("{%s}" % (" " * bar_width))
    sys.stdout.flush()
    sys.stdout.write("\b" * (bar_width+1))
    for sc_file in sc_files:
        sc_dict = score2dict(results_root+sc_file)
        passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters)
        coh_name = sc_file.split('all_')[1].split('_on_')[0]
        doc_name = re.split(pattern='_[0-9]{1,2}\.[0-9]{1,2}\.score', string=sc_file.split('_on_')[1])[0]
        # print(len(passed), coh_name, doc_name, binding_dict[coh_name][doc_name])
        if coh_name not in results.keys():
            results[coh_name] = {}
            cohs.append(coh_name)
        if doc_name not in docs:
            docs.append(doc_name)
        results[coh_name][doc_name] = obs_pre[binding_dict[coh_name][doc_name]][len(passed) >= 10]
        sys.stdout.write("%s" % random.choice(['!', '@', '#', '$', '%', '^', '&', '*', '(', ')']))
        sys.stdout.flush()
    sys.stdout.write("}\n")

    # move data to data frame
    df = pd.DataFrame(columns=sorted(docs), index=sorted(cohs), data=-100)
    for coh, doc_dict in results.items():
        for doc, res in doc_dict.items():
            df[doc][coh] = res

    df = df.transpose()
    print(df)
    axis = plt.gca()
    cmap = colors.ListedColormap(['white', 'red', 'blue', 'green', 'yellow'])
    bounds = [-101, -0.5, 0.5, 1.5, 2.5, 3.5]
    norm = colors.BoundaryNorm(bounds, cmap.N)
    heatmap = plt.pcolor(np.array(df), cmap=cmap, norm=norm, edgecolors='k', linewidth=2)
    plt.yticks(np.arange(0.5, len(df.index), 1), [official_names(a) for a in df.index])
    plt.xticks(np.arange(0.5, len(df.columns), 1), [official_names(a) for a in df.columns], rotation=70)
    axis.set_aspect('equal')

    legend = plt.colorbar(heatmap)
    # legend.ax.set_yticklabels(['NA', 'TN', 'TP', 'FP', 'FN'])
    legend.ax.get_yaxis().set_ticks([])
    for j, lab in enumerate(['NA', 'TN', 'TP', 'FP', 'FN']):
        legend.ax.text(.5, (2 * j + 1) / 10.0, lab, ha='center', va='center')
    legend.ax.get_yaxis().labelpad = 15
    plt.suptitle(str(args_))
    plt.show()
def score_diagonal(args: dict):
    results = {}
    run_filters = generate_run_filters(args)
    sc_files = [a for a in os.listdir(args['score_dir']) if a[-6:] == '.score']
    for sc_file in sc_files:
        score_dict = score2dict(sc_file)
        passed, failed = all_who_pass_run_filters(args, score_dict, run_filters)
        results[sc_file] = len(list(passed.keys()))
        print(sc_file, len(list(passed.keys())))
def prism(args):
    run_filters = generate_run_filters(args={'ddg': 16.0, 'sasa': 1200, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
                                             'hbonds': 10})
    score_dict = score2dict(args['score_file'])
    passed, failed = all_who_pass_run_filters(args, score_dict, run_filters)
    print('RMSD\tPassed\tFailed')
    for v in passed.values():
        print('%3.3f\t%3.3f' % (v['rmsd'], v['ddg']))
    for v in failed.values():
        print('%3.3f\t\t%3.3f' % (v['rmsd'], v['ddg']))
Exemplo n.º 4
0
def folder_scores(folder: str) -> dict:
    """
    concatenates all the score files on the folder to one score dict
    :param folder: a folder address
    :return: {name: {filter: grade}} a score dict for the entire folder
    """
    results = {}
    score_files = [a for a in os.listdir(folder) if a[-3:] == '.sc']
    for score in score_files:
        results.update(score2dict(score))
    return results
Exemplo n.º 5
0
def folder_scores(folder: str) -> dict:
    """
    concatenates all the score files on the folder to one score dict
    :param folder: a folder address
    :return: {name: {filter: grade}} a score dict for the entire folder
    """
    results = {}
    score_files = [a for a in os.listdir(folder) if a[-3:] == '.sc']
    for score in score_files:
        results.update(score2dict(score))
    return results
Exemplo n.º 6
0
def swithces_from_diagonal(args, run_filters, coh_seq_dict, doc_seq_dict):
    """
    :param args: run arguments
    :param run_filters: run filters
    :param coh_seq_dict: {name: AASeq()} of cohesins
    :param doc_seq_dict: {name: AASeq()} of dockerins
    :return: {switch_name: {design_name: #purples}
    """
    # positions_dict = {'1anu': [32, 36, 62, 65, 69, 82, 115, 126],
    #                   '1aoh': [33, 37, 63, 66, 70, 83, 119, 130],
    #                   '1ohz': [33, 35, 37, 39, 63, 66, 68, 70, 73, 75, 77, 79, 81, 83, 121, 125, 127],
    #                   '2ccl': [33, 37, 63, 66, 70, 83, 116, 127]}
    # type_dict = {'D': 'n', 'E': 'n', 'K': 'p', 'R': 'p'}
    results = {}
    sc_files = [a for a in os.listdir(args['score_dir']) if a[-6:] == '.score']
    bins = {}
    for sc_file in sc_files:
        score_dict = score2dict(sc_file)
        passed, failed = all_who_pass_run_filters(args, score_dict,
                                                  run_filters)
        # results[sc_file] = len(list(passed.keys()))
        if len(list(passed.keys())) <= args['purples_threshold']:
            continue
        ### these kick out the date from the score names, and the makes it into the proper names:
        if '_11.10' in sc_file:
            coh_seq = coh_seq_dict[''.join(
                str(sc_file[4:-6] + '_0001.pdb.A').split('_11.10'))]
            doc_seq = doc_seq_dict[''.join(
                str(sc_file[4:-6] + '_0001.pdb.B').split('_11.10'))]
        elif '_12.10' in sc_file:
            coh_seq = coh_seq_dict[''.join(
                str(sc_file[4:-6] + '_0001.pdb.A').split('_12.10'))]
            doc_seq = doc_seq_dict[''.join(
                str(sc_file[4:-6] + '_0001.pdb.B').split('_12.10'))]

        result = Result(sc_file[4:-6], coh_seq, doc_seq,
                        len(list(passed.keys())))

        # pos_str = coh_seq.get_positions(positions_dict['1ohz'])
        # switch_str = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in pos_str])
        #
        # doc_str = doc_seq.get_positions(list(doc_symm_poses.keys()))
        # doc_switch = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in doc_str])

        # if switch_str not in bins.keys():
        #     bins[switch_str] = {}
        # if doc_switch not in bins[switch_str].keys():
        #     bins[switch_str][doc_switch] = {}

        # bins[switch_str][doc_switch][''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_11.10'))] \
        #     = {'doc_seq': doc_seq, 'purples': len(list(passed.keys()))}
        results[sc_file[4:-6]] = result
    # return bins
    return results
def prediction_results(args):
    sc_files = [a for a in os.listdir() if a[-6:] == '.score']
    run_filters = generate_run_filters(args={'ddg': 18.0, 'sasa': 1300, 'shape': 0.6, 'packstat': 0.6,
                                                 'buried_2': 3, 'hbonds': 0})
    for sc_file in sc_files:
        sc_dict = score2dict(sc_file)
        passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters)
        names = sc_file.split('_on_')
        coh = names[0].split('all_')[1]
        doc = names[1].split('_0')[0]
        print("%-10s %-10s %-4i %-3f %i" % (coh, doc, len(passed), 100*len(passed)/(len(failed)+len(passed)), len(sc_dict.keys())))
def get_no_docking_results(sc_name: str, no_dock_file: str) -> (float, float):
    """
    :param sc_name: score name
    :param no_dock_file: score file
    :return: rmsd, ddg
    """
    sc_dict = score2dict(no_dock_file)
    min_rmsd, min_ddg = 1000., 1000.
    for sc in sc_dict.values():
        if sc['rmsd'] < min_rmsd:
            min_rmsd, min_ddg = sc['rmsd'], sc['ddg']
    return min_rmsd, min_ddg
def analyse_minidiagonal(args):
    with open('../minidiagonal.txt', 'w') as fout:
        run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
                                                 'hbonds': 10})
        counter = 0
        score_files = [a for a in os.listdir('./') if a[-3:] == '.sc']
        for sc in score_files:
            score_dict = score2dict(sc)
            passed, failed = all_who_pass_run_filters(args, score_dict, run_filters)
            if len(passed) > 5:
                fout.write('%s\t%i\n' % (sc, len(passed)))
                counter += 1
    print('%i passed minidiagonal' % counter)
def post_pred_cliques(args):

    run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3})

    if not os.path.isfile('./all_data.obj'):
        sc_files = [a for a in os.listdir('./') if '.score' in a]
        cohs_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_cohs.fasta')
        docs_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_docs.fasta')
        results = []
        for sc_file in sc_files:
            seq_name = '_'.join(sc_file.split('_')[1:8])
            coh_name = seq_name+'.pdb.gz.A'
            doc_name = seq_name+'.pdb.gz.B'
            sc_dict = score2dict(sc_file)
            ynum = re.search('y[0-9]{3}', sc_file).group(0)
            passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters)
            if len(passed) >= args['purples_threshold']:
                r = Result(seq_name, cohs_seqs[coh_name], docs_seqs[doc_name], len(passed))
                results.append(r)
        with open('./all_data.obj', 'wb') as fout:
            pickle.dump(results, fout)
    else:
        with open('./all_data.obj', 'rb') as fin:
            results = pickle.load(fin)

    if not os.path.isfile('./graph.obj'):
        result_dict = {i+1: r for i, r in enumerate(results)}
        G = nx.Graph()
        [G.add_node(a) for a in result_dict.keys()]
        for n1 in G.nodes_iter():
            for n2 in G.nodes_iter():
                if n1 != n2:
                    coh_sw_1, coh_sw_2 = result_dict[n1].coh_switch, result_dict[n2].coh_switch
                    doc_sw_1, doc_sw_2 = result_dict[n1].doc_switch, result_dict[n2].doc_switch
                    doc_wt_1, doc_wt_2 = result_dict[n1].doc_wt, result_dict[n2].doc_wt
                    doc_diff = 1 if are_docs_from_diff_clusters(doc_wt_1, doc_wt_2) else 0
                    symm_switch = switch_symm_changer(doc_sw_2)
                    if switches_differ({'diff_by': args['diff_by']}, coh_sw_1, coh_sw_2) >= args['diff_by'] and \
                            switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, doc_sw_2) + doc_diff >= args['doc_diff_by'] and \
                            switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, symm_switch) + doc_diff >= args['doc_diff_by']:
                        G.add_edge(n1, n2)
                        print('adding edge\n', result_dict[n1], '\n', result_dict[n2])
                    else:
                        print('NOT\n', result_dict[n1], '\n', result_dict[n2])
        cliques = [a for a in nx.find_cliques(G)]
        max_len = max([len(a) for a in cliques])
        max_cliques = [a for a in cliques if len(a) == max_len]
        for clq in max_cliques:
            print(clq, '\n', '\n'.join([str(result_dict[a]) for a in clq]))
def find_thresholds_by_rmsd(args):
    """
    :param args: run args
    :return: minimal/maximal (depending on threshold type) of the different filters that will pass all structures with
    RMSD under args['rmsd_threshold'].
    """
    score_dict = score2dict(args['score_file'])
    filter_thresholds = dict(a_score=-100000., a_sasa=100000., a_shape=1.0, total_score=1000000., a_ddg=-100.,
                             a_packstat=1.0, a_buried_2=0)
    passed_rmsd = []
    for name, sc in score_dict.items():
        if sc['rmsd'] <= args['rmsd_threshold']:
            filter_thresholds['a_score'] = max([filter_thresholds['a_score'], sc['score']])
            filter_thresholds['a_sasa'] = min([filter_thresholds['a_sasa'], sc['sasa']])
            filter_thresholds['a_shape'] = min([filter_thresholds['a_shape'], sc['shape']])
            filter_thresholds['total_score'] = min([filter_thresholds['total_score'], sc['total_score']])
            filter_thresholds['a_ddg'] = max([filter_thresholds['a_ddg'], sc['ddg']])
            filter_thresholds['a_packstat'] = min([filter_thresholds['a_packstat'], sc['packstat']])
            filter_thresholds['a_buried_2'] = max([filter_thresholds['a_buried_2'], sc['buried_2']])
            passed_rmsd.append(sc)
    print('found %i scores with rmsd <= %f' % (len(passed_rmsd), args['rmsd_threshold']))
    # print('the old thresholds were:\n%s' % '\n'.join(['%s %f' % (k, v) for k, v in dimer_data().items()]))
    print('the old thresholds were:', generate_run_filters().report())
    print('defined these new filters:\n%s' % '\n'.join(['%s %f' % (k, v) for k, v in filter_thresholds.items()]))
    args['dimer_data'] = filter_thresholds
    run_filters_updated = RunFilters()
    run_filters_updated.append_filter(Filter(name='a_ddg', typ='ddg', threshold=filter_thresholds['a_ddg'],
                                             limits=[-10000, 10000], under_over='under', g_name='$\Delta$$\Delta$G'))
    run_filters_updated.append_filter(Filter(name='a_score', typ='score', threshold=filter_thresholds['a_score'],
                                             limits=[-10000, 10000], under_over='under', g_name='Score'))
    run_filters_updated.append_filter(Filter(name='a_sasa', typ='sasa', threshold=filter_thresholds['a_sasa'],
                                             limits=[0, 100000], under_over='over', g_name='SASA'))
    run_filters_updated.append_filter(Filter(name='a_shape', typ='shape', threshold=filter_thresholds['a_shape'],
                                             limits=[0.0, 1.0], under_over='over', g_name='Shape Complementarity'))
    run_filters_updated.append_filter(Filter(name='a_packstat', typ='packstat',
                                             threshold=filter_thresholds['a_packstat'], limits=[0.0, 1.0],
                                             under_over='over', g_name='PackStat'))
    run_filters_updated.append_filter(Filter(name='a_buried_2', typ='buried_2',
                                             threshold=filter_thresholds['a_buried_2'], limits=[0, 100],
                                             under_over='under', g_name='UnsatisfiedHBonds'))
    run_filters_updated.append_filter(Filter(name='a_rms', typ='rmsd', threshold=1000, limits=[0, 1000],
                                             under_over='under', g_name='RMSD'))
    passed, failed = all_who_pass_run_filters(args, score_dict, run_filters_updated)
    # this_vs_that(args, run_filters_updated, passed, failed, score_dict)
    multiple_plots(args, run_filters_updated, passed, failed, score_dict)
    args['x'], args['y'] = 'rmsd', 'ddg'
    this_vs_that(args, run_filters_updated, passed, failed, score_dict)
    plt.show()
def swithces_from_diagonal(args, run_filters, coh_seq_dict, doc_seq_dict):
    """
    :param args: run arguments
    :param run_filters: run filters
    :param coh_seq_dict: {name: AASeq()} of cohesins
    :param doc_seq_dict: {name: AASeq()} of dockerins
    :return: {switch_name: {design_name: #purples}
    """
    # positions_dict = {'1anu': [32, 36, 62, 65, 69, 82, 115, 126],
    #                   '1aoh': [33, 37, 63, 66, 70, 83, 119, 130],
    #                   '1ohz': [33, 35, 37, 39, 63, 66, 68, 70, 73, 75, 77, 79, 81, 83, 121, 125, 127],
    #                   '2ccl': [33, 37, 63, 66, 70, 83, 116, 127]}
    # type_dict = {'D': 'n', 'E': 'n', 'K': 'p', 'R': 'p'}
    results = {}
    sc_files = [a for a in os.listdir(args['score_dir']) if a[-6:] == '.score']
    bins = {}
    for sc_file in sc_files:
        score_dict = score2dict(sc_file)
        passed, failed = all_who_pass_run_filters(args, score_dict, run_filters)
        # results[sc_file] = len(list(passed.keys()))
        if len(list(passed.keys())) <= args['purples_threshold']:
            continue
        ### these kick out the date from the score names, and the makes it into the proper names:
        if '_11.10' in sc_file:
            coh_seq = coh_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_11.10'))]
            doc_seq = doc_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.B').split('_11.10'))]
        elif '_12.10' in sc_file:
            coh_seq = coh_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_12.10'))]
            doc_seq = doc_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.B').split('_12.10'))]

        result = Result(sc_file[4:-6], coh_seq, doc_seq, len(list(passed.keys())))

        # pos_str = coh_seq.get_positions(positions_dict['1ohz'])
        # switch_str = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in pos_str])
        #
        # doc_str = doc_seq.get_positions(list(doc_symm_poses.keys()))
        # doc_switch = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in doc_str])

        # if switch_str not in bins.keys():
        #     bins[switch_str] = {}
        # if doc_switch not in bins[switch_str].keys():
        #     bins[switch_str][doc_switch] = {}

        # bins[switch_str][doc_switch][''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_11.10'))] \
        #     = {'doc_seq': doc_seq, 'purples': len(list(passed.keys()))}
        results[sc_file[4:-6]] = result
    # return bins
    return results
def filter_result_for_passed(args):
    """
    analyses a specific filter behaviour for all passed structures
    :param args: run arguments
    :return:
    """
    run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6})
    score_files = sorted(args['score_files'])
    results = {}
    for i in score_files:
        results[i] = {}
        score_dict = score2dict(i)
        passed, failed = all_who_pass_run_filters(args, score_dict, run_filters)
        results[i]['filter_passed'] = []
        for name, psd in passed.items():
            results[i]['filter_passed'].append(psd[args['filter']])
    plt.boxplot([a['filter_passed'] for a in results.values()], labels=[a.split('.score')[0] for a in results.keys()])
    plt.show()
Exemplo n.º 14
0
def creat_coh_doc_purples():
    results_path = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/reclique_18Nov/cliques_prediction/results/'
    sc_files = [a for a in os.listdir(results_path) if '.score' in a]
    run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
                                             'hbonds': -10.})

    if not os.path.isfile(results_path+'analysed.obj'):
        coh_doc_purples = {}
        for sc_file in sc_files:
            sc_dict = score2dict(results_path+sc_file)
            passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters)
            coh_name = what_coh(sc_file, args={'naming': 'coh_on_doc'})
            doc_name = what_doc(sc_file, args={'naming': 'coh_on_doc'})
            if coh_name not in coh_doc_purples.keys():
                coh_doc_purples[coh_name] = {}
            coh_doc_purples[coh_name][doc_name] = len(passed)
        pickle.dump(coh_doc_purples, open(results_path+'analysed.obj', 'wb'))

    else:
        print("reading coh_doc_purples")
        coh_doc_purples = pickle.load(open(results_path+'analysed.obj', 'rb'))
    return coh_doc_purples
def analyse_matrix(args: dict):
    # run_filters = generate_run_filters(args={'ddg': 22.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6})
    # run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6})
    # run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6})
    run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
                                             'hbonds': -10.})
    # run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.65, 'buried_2': 3})
    # print(run_filters.report())
    score_files = [a for a in os.listdir('./') if a[-6:] == '.score']
    coh_name_list = sorted(list(set([what_coh(a, args) for a in score_files])))
    doc_name_list = sorted(list(set([what_doc(a, args) for a in score_files])))
    # print(coh_name_list)
    # print(doc_name_list)
    # df = DataFrame([{coh: Series([-1] * len(doc_name_list))} for coh in coh_name_list], index=doc_name_list)
    df = DataFrame([{coh: -1 for coh in coh_name_list}] * len(doc_name_list), index=doc_name_list)
    for sc in score_files:
        score_dict = score2dict(sc)
        passed, failed = all_who_pass_run_filters(args, score_dict, run_filters)
        coh = what_coh(sc, args)
        doc = what_doc(sc, args)
        df[coh][doc] = len(passed)
    # print(df)
    show_prediction_heat_map(df)
if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('-score_file')
    parser.add_argument('-coh_seqs_file')
    parser.add_argument('-doc_seqs_file')
    parser.add_argument('-mode')
    parser.add_argument('-n', type=int, default=1)
    parser.add_argument('-diff_by', type=int, default=2)
    parser.add_argument('-doc_diff_by', type=int, default=1)
    parser.add_argument('-score_dir', type=str, default='./')
    parser.add_argument('-purples_threshold', type=int, default=50)
    args = vars(parser.parse_args())

    if args['mode'] != 'bins_diagonal' and args['mode'] != 'post_pred_cliques' and args['mode'] != 'minidiagonal_cliques':
        scores = score2dict(args['score_file'])
        run_filters = ''#generate_run_filters()
        coh_seq_dict = read_multi_fastas(args['coh_seqs_file'], suffix_to_remove='.pdb.gz')
        doc_seq_dict = read_multi_fastas(args['doc_seqs_file'], suffix_to_remove='.pdb.gz')

    if args['mode'] == 'switches_n_cliques':
        switches, num_bins = make_switches(args, scores, run_filters, coh_seq_dict)
        max_cliques = best_cliques(args, list(switches.keys()))

        with open('switches.obj', 'wb') as sw_file:
            pickle.dump(switches, sw_file)
        with open('max_cliques.obj', 'wb') as clq_file:
            pickle.dump(max_cliques, clq_file)

    elif args['mode'] == 'choose_by_identity':
        with open('switches.obj', 'rb') as sw_in:
Exemplo n.º 17
0
def post_pred_cliques(args):

    run_filters = generate_run_filters(args={
        'ddg': 25.0,
        'sasa': 1400,
        'shape': 0.6,
        'packstat': 0.6,
        'buried_2': 3
    })

    if not os.path.isfile('./all_data.obj'):
        sc_files = [a for a in os.listdir('./') if '.score' in a]
        cohs_seqs = read_multi_fastas(
            '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_cohs.fasta'
        )
        docs_seqs = read_multi_fastas(
            '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_docs.fasta'
        )
        results = []
        for sc_file in sc_files:
            seq_name = '_'.join(sc_file.split('_')[1:8])
            coh_name = seq_name + '.pdb.gz.A'
            doc_name = seq_name + '.pdb.gz.B'
            sc_dict = score2dict(sc_file)
            ynum = re.search('y[0-9]{3}', sc_file).group(0)
            passed, failed = all_who_pass_run_filters(args, sc_dict,
                                                      run_filters)
            if len(passed) >= args['purples_threshold']:
                r = Result(seq_name, cohs_seqs[coh_name], docs_seqs[doc_name],
                           len(passed))
                results.append(r)
        with open('./all_data.obj', 'wb') as fout:
            pickle.dump(results, fout)
    else:
        with open('./all_data.obj', 'rb') as fin:
            results = pickle.load(fin)

    if not os.path.isfile('./graph.obj'):
        result_dict = {i + 1: r for i, r in enumerate(results)}
        G = nx.Graph()
        [G.add_node(a) for a in result_dict.keys()]
        for n1 in G.nodes_iter():
            for n2 in G.nodes_iter():
                if n1 != n2:
                    coh_sw_1, coh_sw_2 = result_dict[
                        n1].coh_switch, result_dict[n2].coh_switch
                    doc_sw_1, doc_sw_2 = result_dict[
                        n1].doc_switch, result_dict[n2].doc_switch
                    doc_wt_1, doc_wt_2 = result_dict[n1].doc_wt, result_dict[
                        n2].doc_wt
                    doc_diff = 1 if are_docs_from_diff_clusters(
                        doc_wt_1, doc_wt_2) else 0
                    symm_switch = switch_symm_changer(doc_sw_2)
                    if switches_differ({'diff_by': args['diff_by']}, coh_sw_1, coh_sw_2) >= args['diff_by'] and \
                            switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, doc_sw_2) + doc_diff >= args['doc_diff_by'] and \
                            switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, symm_switch) + doc_diff >= args['doc_diff_by']:
                        G.add_edge(n1, n2)
                        print('adding edge\n', result_dict[n1], '\n',
                              result_dict[n2])
                    else:
                        print('NOT\n', result_dict[n1], '\n', result_dict[n2])
        cliques = [a for a in nx.find_cliques(G)]
        max_len = max([len(a) for a in cliques])
        max_cliques = [a for a in cliques if len(a) == max_len]
        for clq in max_cliques:
            print(clq, '\n', '\n'.join([str(result_dict[a]) for a in clq]))
def jack_matrix(args):
    """
    displays a matrix of ddG Vs. RMSD
    :param args: run arguments
    :return:
    """
    args['show_fig'] = False
    fig = plt.figure()
    # fig = plt.figure(figsize=(8.27, 11.69))
    # run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6})
    run_filters = generate_run_filters(args={'ddg': -16, 'sasa': 1200, 'shape': 0.5, 'packstat': 0.5, 'buried_2': 30,
                                             'hbonds': 10})
    jk_scores = [a for a in os.listdir('./') if a[-6:] == '.score' and 'SD' not in a and 'no_docking' not in a]
    sd_scores = {}
    for sc_f in [a for a in os.listdir('./') if a[-6:] == '.score' and 'SD' in a]:
        if sc_f[4:8] not in sd_scores.keys():
            sd_scores[sc_f[4:8]] = {}
        sd_scores[sc_f[4:8]][sc_f[12:16]] = sc_f
    no_dock_files = {}
    for sc_f in [a for a in os.listdir('./') if 'no_docking' in a]:
        if sc_f[:4] not in no_dock_files.keys():
            no_dock_files[sc_f[:4]] = {}
        no_dock_files[sc_f[:4]][sc_f[11:15]] = sc_f
    plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.15, hspace=0.25)
    matplotlib.rcParams['axes.linewidth'] = 0.8
    font0 = FontProperties()
    font = font0.copy()
    font.set_weight('semibold')
    sorted_scores = sorted(jk_scores)
    z = sorted_scores[1]
    sorted_scores[1] = sorted_scores[-2]
    sorted_scores[-2] = z
    sorted_scores = [sorted_scores[0], sorted_scores[1]] + sorted(sorted_scores[2:])
    for num, sc in enumerate(sorted_scores):
        ax = plt.subplot(3, 3, 1+num)
        sc_dict = score2dict(sc)

        passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters)
        x_passed, y_passed, x_failed, y_failed = [], [], [], []
        for k, v in passed.items():
            x_passed.append(v[args['x']])
            y_passed.append(v[args['y']])
        for k, v in failed.items():
            x_failed.append(v[args['x']])
            y_failed.append(v[args['y']])

        # draw simple docking results
        if sc[4:8] in sd_scores.keys():
            crystals = list(sd_scores[sc[4:8]].keys())
            sd_sc_dict = score2dict(sd_scores[sc[4:8]][crystals[0]])
            plt.scatter([a['rmsd'] for a in sd_sc_dict.values()], [a['ddg'] for a in sd_sc_dict.values()], marker='.',
                        c='lightgrey', s=30, linewidths=0)
            if len(crystals) > 1:
                sd_sc_dict = score2dict(sd_scores[sc[4:8]][crystals[1]])
                plt.scatter([a['rmsd'] for a in sd_sc_dict.values()], [a['ddg'] for a in sd_sc_dict.values()],
                            marker='.', c='grey', s=30, linewidths=0)
        # draw DoCohModeller results
        plt.scatter(x_failed, y_failed, marker='.', c='blue', alpha=0.6, s=50, linewidth=0.2)
        plt.scatter(x_passed, y_passed, marker='.', c='red', alpha=0.6, s=100, linewidths=0.3)

        # draw no docking results
        no_dock_rmsd_1, no_dock_ddg_1 = get_no_docking_results(sc, list(no_dock_files[sc[4:8]].values())[0])
        print('marking %s with %f, %f' % (sc[4:8], no_dock_rmsd_1, no_dock_ddg_1))
        plt.scatter(no_dock_rmsd_1, no_dock_ddg_1, c='lightgrey', marker='^', s=60)
        if len(no_dock_files[sc[4:8]].keys()) > 1:
            no_dock_rmsd_2, no_dock_ddg_2 = get_no_docking_results(sc, list(no_dock_files[sc[4:8]].values())[1])
            print('marking %s with %f, %f' % (sc[4:8], no_dock_rmsd_2, no_dock_ddg_2))
            plt.scatter(no_dock_rmsd_2, no_dock_ddg_2, c='grey', marker='^', s=60)

        plt.xlim([0, 20])
        plt.ylim([-32.5, 0])
        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)
        ax.xaxis.set_ticks_position('bottom')
        ax.yaxis.set_ticks_position('left')
        plt.yticks([0, -10, -20, -30], fontsize=16)
        plt.xticks(fontsize=16)
        if num not in [0, 3, 6]:
            plt.setp(ax.get_yticklabels(), visible=False)
        if num not in [6, 7, 8]:
            plt.setp(ax.get_xticklabels(), visible=False)
        plt.title(sc.split('_')[1].upper(), fontsize=16, fontproperties=font)
    fig.text(0.5, 0.04, r'RMSD ($\AA$)', ha='center', va='center', fontsize=24)
    fig.text(0.06, 0.5, '∆∆G (R.E.U.)', ha='center', va='center', rotation='vertical', fontsize=24)
    plt.savefig('jack.png', dpi=100)
    plt.show()
Exemplo n.º 19
0
    parser = argparse.ArgumentParser()
    parser.add_argument('-score_file')
    parser.add_argument('-coh_seqs_file')
    parser.add_argument('-doc_seqs_file')
    parser.add_argument('-mode')
    parser.add_argument('-n', type=int, default=1)
    parser.add_argument('-diff_by', type=int, default=2)
    parser.add_argument('-doc_diff_by', type=int, default=1)
    parser.add_argument('-score_dir', type=str, default='./')
    parser.add_argument('-purples_threshold', type=int, default=50)
    args = vars(parser.parse_args())

    if args['mode'] != 'bins_diagonal' and args[
            'mode'] != 'post_pred_cliques' and args[
                'mode'] != 'minidiagonal_cliques':
        scores = score2dict(args['score_file'])
        run_filters = ''  #generate_run_filters()
        coh_seq_dict = read_multi_fastas(args['coh_seqs_file'],
                                         suffix_to_remove='.pdb.gz')
        doc_seq_dict = read_multi_fastas(args['doc_seqs_file'],
                                         suffix_to_remove='.pdb.gz')

    if args['mode'] == 'switches_n_cliques':
        switches, num_bins = make_switches(args, scores, run_filters,
                                           coh_seq_dict)
        max_cliques = best_cliques(args, list(switches.keys()))

        with open('switches.obj', 'wb') as sw_file:
            pickle.dump(switches, sw_file)
        with open('max_cliques.obj', 'wb') as clq_file:
            pickle.dump(max_cliques, clq_file)
def multi_filters_plot(args):
    """
    :param args: run arguments, not used
    :return: draws multiple plots for the different run filter configurations (for my thesis)
    """
    from matplotlib import colors
    from numpy import array, arange

    cmap = colors.ListedColormap(['white', 'cornflowerblue', 'darkturquoise', 'darkorange'])
    bounds = [-100, 0, 10, 20, 100]
    norm = colors.BoundaryNorm(bounds, cmap.N)
    fig = plt.figure(figsize=(6.02, 6.38))
    plt.subplots_adjust(left=0.25, bottom=0.15, right=None, top=None, wspace=0.1, hspace=0.2)

    various_filters = OrderedDict()
    various_filters['ddG'] = generate_run_filters(
        args={'ddg': 24.0, 'sasa': 0000, 'shape': 0., 'packstat': 0.0, 'buried_2': 30, 'hbonds': 0.})
    various_filters['ddG_SASA'] = generate_run_filters(
        args={'ddg': 24.0, 'sasa': 1400, 'shape': 0., 'packstat': 0.0, 'buried_2': 30, 'hbonds': 0.})
    various_filters['ddG_SASA_pack'] = generate_run_filters(
        args={'ddg': 24.0, 'sasa': 1400, 'shape': 0., 'packstat': 0.6, 'buried_2': 30, 'hbonds': 0.})
    various_filters['ddG_SASA_pack_shape'] = generate_run_filters(
        args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 30, 'hbonds': 0.})
    various_filters['ddG_SASA_pack_shape_buried'] = generate_run_filters(args={
        'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': 0.})
    various_filters['ddG_SASA_pack_shape_buried_hbonds'] = generate_run_filters(
        args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': -10.})
    num_letter = {1: 'A', 2: 'B', 3: 'C', 4: 'D', 5: 'E', 6: 'F'}

    score_files = [a for a in os.listdir('./') if a[-6:] == '.score']
    coh_name_list = sorted(list(set([what_coh(a) for a in score_files])))
    doc_name_list = sorted(list(set([what_doc(a) for a in score_files])))
    scores_dict = {}
    for sc in score_files:
        scores_dict[sc] = score2dict(sc)
    i = 1
    for name, filters in various_filters.items():
        df = DataFrame([{coh: -1 for coh in coh_name_list}] * len(doc_name_list), index=doc_name_list)
        for sc, sc_dict in scores_dict.items():
            passed, failed = all_who_pass_run_filters({}, sc_dict, filters)
            coh = what_coh(sc)
            doc = what_doc(sc)
            df[coh][doc] = len(passed)
        plt.subplot(3, 2, i)
        axis = plt.gca()
        heatmap = plt.pcolor(array(df), cmap=cmap, norm=norm, edgecolors='k', linewidth=2)
        for y in range(array(df.shape)[0]):
            for x in range(array(df.shape)[1]):
                if array(df)[y, x] >= 0:
                    plt.text(x+0.5, y+0.5, array(df)[y, x], horizontalalignment='center', verticalalignment='center', fontsize=6)
        # make sure labels are only on outer subplots
        if i in [1, 3, 5]:
            plt.yticks(arange(0.5, len(df.index), 1), [official_names(n) for n in df.index], fontsize=10)
        else:
            plt.yticks([])
        if i in [5, 6]:
            plt.xticks(arange(0.5, len(df.columns), 1), [official_names(n) for n in df.columns], rotation=70, fontsize=10)
        else:
            plt.xticks([])
        plt.title(num_letter[i])
        # plt.title(official_title(name))
        i += 1
        # axis.set_aspect('equal')
    fig.text(0.5, 0.04, 'Cohesin name', ha='center', va='center', fontsize=24)
    fig.text(0.06, 0.5, 'Dockerin name', ha='center', va='center', rotation='vertical', fontsize=24)
    plt.savefig('mini_postdiction.png', dpi=600)
    plt.show()
    args = vars(parser.parse_args())

    if args['mode'] == 'how_many_pass':
        score_dict = score2dict_new(args['score_file'])
        all_who = all_who_pass(args, score_dict)
        percentage = 100.0*float(len(all_who))/float(len(score_dict.values()))
        print('there were %i purples out of %i, which is %f' % (len(all_who),
                                                                len(score_dict.values()), percentage))
        if args['show_all']:
            for k in all_who:
                print(k['description']+'.pdb')

    elif args['mode'] == 'analyse':
        # args['run_filters'] = generate_run_filters()
        score_dict = score2dict(args['score_file'])
        passed = all_who_pass(args, score_dict)
        print('found %i purples' % len(passed))
        if len(passed) == 0:
            print('non passed, so the lowest ddg are:')
            best_n_structs = best_n_structures(args, score_dict)
        else:
            print('had passed, so best ddg out of those:')
            passed_score_dict = {a['description']: a for a in passed}
            best_n_structs = best_n_structures(args, passed_score_dict)
        print('the best %i strucutres by filter %s are \n%s.pdb' % (args['n'], args['filter'],
                                                                '.pdb '.join([a['description'] for a in best_n_structs])))

        if args['show_all']:
            print('showing ALL structures that passed the thresholds:')
            print('.pdb '.join([a['description'] for a in passed]) + '.pdb')
Exemplo n.º 22
0
        'buried_2': 3,
        'hbonds': 10
    })
all_docs = [a for a in os.listdir(topath) if 'doc_' in a]
fout = open(topath + 'minidiagonal_full_names.txt', 'w')

for doc in all_docs:
    all_dirs = os.listdir(topath + doc)
    for dir in all_dirs:
        try:
            sc_file = [
                a for a in os.listdir(topath + doc + '/' + dir)
                if a[-3:] == '.sc'
            ]
            if sc_file:
                sc_dict = score2dict(topath + doc + '/' + dir + '/' +
                                     sc_file[0])
                passed, failed = all_who_pass_run_filters({}, sc_dict,
                                                          run_filters)
                if len(passed) > 5:
                    fout.write('%s\t%i\n' % (dir, len(passed)))
                    shutil.copy(
                        topath + doc + '/' + dir + '/' +
                        list(sc_dict.keys())[0] + '.pdb.gz',
                        topath + 'minidiagonal_pdbs')
        except:
            print('no folder', dir)
fout.close()

# def analyse_minidiagonal(args):
#     with open('../minidiagonal.txt', 'w') as fout:
#         run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
Exemplo n.º 23
0
from RosettaFilter import score2dict


topath = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/clique_6_pdbs/mini_diagonal_11Nov/'
run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
                                         'hbonds': 10})
all_docs = [a for a in os.listdir(topath) if 'doc_' in a]
fout = open(topath+'minidiagonal_full_names.txt', 'w')

for doc in all_docs:
    all_dirs = os.listdir(topath+doc)
    for dir in all_dirs:
        try:
            sc_file = [a for a in os.listdir(topath+doc+'/'+dir) if a[-3:] == '.sc']
            if sc_file:
                sc_dict = score2dict(topath+doc+'/'+dir+'/'+sc_file[0])
                passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters)
                if len(passed) > 5:
                    fout.write('%s\t%i\n' % (dir, len(passed)))
                    shutil.copy(topath+doc+'/'+dir+'/'+list(sc_dict.keys())[0]+'.pdb.gz', topath+'minidiagonal_pdbs')
        except:
            print('no folder', dir)
fout.close()

# def analyse_minidiagonal(args):
#     with open('../minidiagonal.txt', 'w') as fout:
#         run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
#                                                  'hbonds': 10})
#         counter = 0
#         score_files = [a for a in os.listdir('./') if a[-3:] == '.sc']
#         for sc in score_files:
Exemplo n.º 24
0
#!/usr/bin/env python3.5
from RosettaFilter import score2dict
import sys
__author__ = 'jonathan'

score_dict = score2dict(sys.argv[1])
print(score_dict[sys.argv[2]])