Exemplo n.º 1
0
        def _create_mapping(r):
            _, query_start, query_end = parse_header(ali.ids[0])

            # create mapping from query into PDB Uniprot sequence
            # A_i will be query sequence indices, A_j Uniprot sequence indices
            m = map_indices(ali[0], query_start, query_end,
                            ali[r["alignment_id"]], r["alignment_start"],
                            r["alignment_end"])

            # create mapping from PDB Uniprot into seqres numbering
            # j will be Uniprot sequence index, k seqres index
            n = pd.DataFrame({
                "j":
                list(range(r["uniprot_start"], r["uniprot_end"] + 1)),
                "k":
                list(range(r["resseq_start"], r["resseq_end"] + 1)),
            })

            # need to convert to strings since other mapping has indices as strings
            n.loc[:, "j"] = n.j.astype(str)
            n.loc[:, "k"] = n.k.astype(str)

            # join over Uniprot indices (i.e. j);
            # get rid of any position that is not aligned
            mn = m.merge(n, on="j", how="inner").dropna()

            # extract final mapping from seqres (k) to query (i)
            map_ = dict(zip(mn.k, mn.i))

            return map_, mn
Exemplo n.º 2
0
def align_dca2pdb(msa_name, pdbseq, msaseq):
    """
    Some code taken from
    https://github.com/bsir/dca-frustratometer/blob/master/dca_frustratometer.py
    :param msa_name:
    :param pdbseq: PDB seq string
    :param msaseq: MSA seq string
    :return:
    """
    import numpy as np
    from Bio import pairwise2
    from evcouplings.compare import mapping as evmp
    print("(map_dca2pdb)\taligning dca sequence to pdb coord sequence...")

    # need to penalize for opening and adding gaps otherwise mapping is off (s param {-.5,-.1})
    alignments_1 = pairwise2.align.globalxs(pdbseq[0], msaseq[0], -.5, -.1)
    alignments_2 = pairwise2.align.globalxs(pdbseq[1], msaseq[1], -.5, -.1)
    print(pairwise2.format_alignment(*alignments_1[0], full_sequences=True))
    print(pairwise2.format_alignment(*alignments_2[0], full_sequences=True))

    map_1 = evmp.map_indices(alignments_1[0][0], 1, 0, alignments_1[0][1], 1,
                             0)
    map_2 = evmp.map_indices(alignments_2[0][0], 1 + len(pdbseq[0]), 0,
                             alignments_2[0][1], 1 + len(msaseq[0]), 0)
    map_pdb_dca = map_1.append(map_2)
    map_pdb_dca = map_pdb_dca.rename(columns={
        "i": "pdb_i",
        "A_i": "pdb_res",
        "j": "dca_i",
        "A_j": "dca_res"
    })

    outfile = "results\\reference_maps\\ref_map_{}.txt".format(
        msa_name.strip(".fas"))
    np.savetxt(outfile,
               map_pdb_dca,
               header="pdb_i\tpdb_res\tdca_i\tdca_res",
               fmt="%s\t%s\t%s\t%s",
               comments='')
    print("(map_dca2pdb)\tWrote {}".format(outfile))

    map_pdb_dca = map_pdb_dca.dropna()
    map_dca2pdb_dict = dict(zip(map_pdb_dca["dca_i"], map_pdb_dca["pdb_i"]))
    return map_dca2pdb_dict