Exemplo n.º 1
0
def getNOEData(noe_files, ss_seq):
    print noe_files
    noe_lines = io.readFile(noe_files)
    noe_matrix = zeros((len(ss_seq) + 1, len(ss_seq) + 1))
    for noel in noe_lines:
        if len(noel) <= 1:
            pass
        else:
            res1, atm1, res2, atom2, noe = noel.split()
            noe_matrix[int(res1), int(res2)] = 1.
            noe_matrix[int(res2), int(res1)] = 1.

    return noe_matrix
Exemplo n.º 2
0
def FormatRdc(seqlen, rdcfile):
    """
    parses rdc from .npc file.
    should be in the format #['179', 'H', '179', 'N', '16.042', '0.0']
    the rdcs are returned as a dict with res_no as key and rdc def as value.
    """

    import io_util as io
    rdc_l = io.readFile(rdcfile)
    rdcs = {}
    for l in rdc_l:
        r1, v1, r2, v2, rdc, tol = l.split()
        rdcs.setdefault(int(r1),
                        []).append([int(r1), v1,
                                    int(r2), v2,
                                    float(rdc)])
    return rdcs
Exemplo n.º 3
0
def parseSS(filename):
    """
    Parse the Secondary Structure assignment file for Amino acid sequence and Secondary structure seq
    :param filename:
    :return: aa_seq, ss_seq
    """

    data = io.readFile(filename)
    aa_seq = ''
    ss_seq = ''

    # up_index, up_residue, ss_pred ss_conf msa_index, msa_cons%, msa_cons, in_construct
    # ['232', 'I', 'H', '3', '232', '2', '~', '*\n']

    for i in range(1, len(data)):
        line = data[i].split('\t')
        aa_seq = aa_seq + line[1]
        ss_seq = ss_seq + line[2]

    return aa_seq, ss_seq
Exemplo n.º 4
0
def parseContacts(filename, ss_combi, ss_def, nor, cutoff_score):
    """
    Parse the ev_couplings generated using plm method into contact data arrays
    :param filename, ss_combi:
    :return contact_matrix, plm_score_matrix:
    """
    data = io.readFile(filename)
    """
    list of all-by-all residue pairings, and score computed by chosen method
    MI_DI column headers:
    - 1stResidueNum
    - 1stResidueCode
    - 2ndResidueNum
    - 2ndResidueCode
    - mutual information score
    - DI score
    PLM columns are the same, replacing DI score with PLM score, and
    omitting MI scores (always 0)
    """
    from collections import defaultdict
    from operator import itemgetter
    from itertools import combinations

    plm_contacts = defaultdict(list)

    for line in data:
        r1, a1, r2, a2, pl, score = line.split()

        if round(float(score), 2) > cutoff_score:
            # plm_contacts[int(r1)].append([int(r2), float(score)])
            # this new modification for the Cell paper dataset only
            plm_contacts[int(r1)].append([int(r2), float(pl)])

    for resi in plm_contacts.keys():
        plm_contacts[resi] = sorted(plm_contacts[resi],
                                    key=itemgetter(1),
                                    reverse=True)
    #print "matrix order :", nor
    contact_matrix = zeros(
        (nor + 1, nor + 1))  # correct for indicies numbering
    plm_score_matrix = zeros(
        (nor + 1, nor + 1))  # keep residue numbering as it is
    contact_ss_matrix = zeros((nor + 1, nor + 1))

    for pair in list(combinations(ss_combi.keys(), 2)):
        sse1 = ss_combi[pair[0]]
        sse2 = ss_combi[pair[1]]
        for i in range(0, len(sse1)):
            for j in range(0, len(sse2)):
                for k in range(sse1[i][4], sse1[i][5] + 1):
                    for l in range(sse2[j][4], sse2[j][5] + 1):
                        for entry in plm_contacts[k]:
                            if entry[0] == l:
                                #print k, l, sse1[i], sse2[j]
                                contact_matrix[k, l] = 1.0
                                contact_matrix[l, k] = 1.0
                                plm_score_matrix[k, l] = entry[1]
                                plm_score_matrix[l, k] = entry[1]

    for i in range(0, len(ss_def) - 1):
        print ss_def[i], ss_def[i + 1]
        for j in range(ss_def[i][3], ss_def[i][4] + 1):
            for k in range(ss_def[i + 1][3], ss_def[i + 1][4] + 1):
                # print plm_contacts[j], j, k
                for entry in plm_contacts[j]:
                    if entry[0] == k:
                        print j, k
                        contact_ss_matrix[j, k] = 1.0
                        contact_ss_matrix[k, j] = 1.0

    return contact_matrix, plm_score_matrix, contact_ss_matrix