Пример #1
0
    def selectProteinAtomsCrds(self, zrange=[0, 1.0]):
        """get protein atoms' coordinates if the protein
        atoms z coordinates in zrange

        Parameters
        ----------
        zrange : iterable, length = 2
            up and low boundaries

        Returns
        -------
        selected_atoms : np.ndarray, shape = [ N, 3]
            the xyz coordinates of selected atoms
            N is number of atoms selected
        """

        pdbio = parsePDB()
        atominfor = pdbio.atomInformation(self.pdb)

        ndx = atominfor.keys()

        # a list of atom index, strings
        selected_ndx = [x for x in ndx if atominfor[x][1] == "Protein"]

        cpdb = coordinatesPDB()
        crds = np.asarray(cpdb.getAtomCrdByNdx(self.pdb, selected_ndx))

        selected_crds = [
            x for x in crds if ((x[2] > zrange[0]) and (x[2] < zrange[1]))
        ]

        return np.asarray(selected_crds)
Пример #2
0
    def parsePDB(self):

        pio = pdbIO.parsePDB(inPDB=self.inpdb)
        infor = pio.atomInformation(self.inpdb)

        atomndx = infor.keys()
        elements = [infor[x][7] for x in atomndx]

        return (atomndx, elements)
Пример #3
0
    def domainCOM(self,
                  domainf,
                  ref,
                  pdbchain='A',
                  output="com_domains.day",
                  atomNames=["CA"]):
        """
        calculate centor of mass, or geometry center of a domain
        :param domainf:str, domain information data file
        :param ref: str, reference pdb file
        :param pdbchain: chain id
        :param output: str, output file name
        :return: list of list, com of domains, dimension N*3
        """

        dom = parsePDB()
        domains = dom.readDomainRes(domainf)

        dnames = [x[0] for x in domains]

        pdbc = coordinatesPDB()

        coms = []

        for i in range(len(domains)):

            # get atom index of residues in a domain
            atomindex = index.gen_atom_index(ref, [
                pdbchain,
            ], domains[i][1:], atomNames, "original")
            atomindex = [str(x) for x in atomindex]

            # get crds of a list of atoms
            crds = pdbc.getAtomCrdByNdx(ref, atomindex)

            # calculate geometry center, not centor of mass
            com = np.mean(np.asarray(crds), axis=0)

            coms.append(list(com))

        np.savetxt(output,
                   np.asarray(coms),
                   fmt="%8.3f",
                   delimiter=" ",
                   header=dnames,
                   comments="#")

        return coms
Пример #4
0
    def elementCount(self, ligand):
        from dockml import pdbIO

        elements = self.getVdWParams().keys()
        atominfor = pdbIO.parsePDB(ligand).atomInformation(ligand)

        elem_count = dict(zip(elements, np.zeros(len(elements))))

        for atom in atominfor.keys():

            if atominfor[atom][7] not in elements:
                elem_count["DU"] += 1
            else:
                elem_count[atominfor[atom][7]] += 1

        return elem_count
Пример #5
0
    def resInDomains(self, domainf, residues):
        '''
        input a list of residues and domain information file
        output the ratio of residues in each domain
        :param domainf:
        :param residues: list, a list of residues from community analysis
        :return:
        '''

        pdb = pdbIO.parsePDB()
        dinfor = pdb.readDomainRes(domainf)

        # eg. { domain_name: [1, 3, 5]}
        domains = collections.defaultdict(list)
        d_count = {}

        for d in dinfor :
            domains[d[0]] = range(d[1], d[2]+1)
            d_count[d[0]] = 0

        for d in d_count.keys() :
            # calculate how many res (in parameter residues) in a domain
            d_count[d] = len(set(domains[d]).intersection(set(residues)))

        # ratio_outof means how much res in the list (residues) in different domains,
        # sum them up, you should get 1
        # eg, this list of residues is 100 residues, only 25 in domain HNH,
        # therefore, the ratio for HNH is 25%.
        ratio_outof    = {}

        # ratio_indomain means, for a specific domain,
        # some ratio of all res in this specific domain, is in the list of residues
        # eg. Domain HNH (have 250 residues), in this community residues list, 25 res in
        # domain HNH, thus the ratio for HNH is 10%
        ratio_indomain = {}

        for d in d_count.keys() :
            ratio_outof[d]    = d_count[d] / float(len(residues))
            ratio_indomain[d] = d_count[d] / float(len(domains[d]))

        ratio_outof = sorted(ratio_outof.items(), key=lambda x: x[1], reverse=True)
        ratio_indomain = sorted(ratio_indomain.items(), key=lambda x:x[1], reverse=True)

        return ratio_indomain, ratio_outof
Пример #6
0
    def domainWiseEigVec(self,
                         domainf,
                         vectors,
                         scalefactor=1.0,
                         output='aver-vectors.dat'):
        '''
        averaging the vectors on each atoms in a domain
        :param domainf: str, domain information data file
        :param vectors: list of lists, dimension N*3
        :param output: str, output file name
        :return: tuple, (domain_average_vectors, domain names)
        '''

        dom = parsePDB()
        domains = dom.readDomainRes(domainf)

        # domain_names
        d_name = [x[0] for x in domains]

        minResIndx = min(sum([x[1:] for x in domains], []))

        aver_vec = []
        for i in range(len(domains)):
            #print(domains[i])
            # becasue vectors index starting from 0
            resindexlist = []
            for k in range((len(domains[i]) - 1) / 2):
                #print(domains[i][k*2+1], domains[i][k*2+2])
                resindexlist += list(
                    np.asarray(
                        range(domains[i][k * 2 + 1], domains[i][k * 2 + 2])) -
                    minResIndx)

            v = self.averageVectors(vectors, resindexlist) * scalefactor
            aver_vec.append(v)

        np.savetxt(output,
                   np.asarray(aver_vec),
                   delimiter=' ',
                   fmt='%12.5f',
                   header=" ".join(d_name),
                   comments="#")

        return (aver_vec, d_name)
Пример #7
0
    def __init__(self, ligandPDB, receptorPDB, gridsize=1.0):
        '''

        :param gridsize: float, grid size, unit angstrom
        '''

        self.gridsize = gridsize
        self.ligpdb = ligandPDB
        self.recpdb = receptorPDB

        bf = BindingFeature()
        self.vdwparm = bf.getVdWParams()
        self.eleparm = bf.getElementParams()

        self.atominfor = pio.parsePDB().atomInformation(self.ligpdb)

        try:
            with open(self.ligpdb) as lines:
                self.ligndx = [x.split()[1] for x in lines if "ATOM" in x]
        except:
            self.ligndx = []
Пример #8
0
def main():

    mtxh = MatrixHandle()

    args, unknown = arguments()

    if args.opt in ["merge", "pair-t-test", "ind-t-test"]:
        if args.ds in ['xyz', 'XYZ', '3d']:
            data1 = mtxh.loadxyz(args.dat[0],
                                 args.dtype,
                                 args.xyzcol,
                                 xyshift=args.xyshift)
            data2 = mtxh.loadxyz(args.dat[1],
                                 args.dtype,
                                 args.xyzcol,
                                 xyshift=args.xyshift)

        elif args.ds in ['matrix', 'mtx']:
            data1 = mtxh.reshapeMtx(args.dat[0],
                                    args.dtype,
                                    xyshift=args.xyshift)
            data2 = mtxh.reshapeMtx(args.dat[1],
                                    args.dtype,
                                    xyshift=args.xyshift)

        else:
            print("Error: Data-shape is not specified. ")
            data1, data2 = np.array([]), np.array([])

        if args.opt == "merge":

            merged = mtxh.merge_matrix(data1, data2)
            np.savetxt(args.out, merged, fmt="%.3f", delimiter=" ")
            print("Merge matrix file completed!")

        elif args.opt == "pair-t-test":
            t, p = stats.ttest_rel(data1[:, 2], data2[:, 2])
            print("T statistics: %6.3f " % t)
            print("P value     : %8.6f" % p)
        elif args.opt == "ind-t-test":
            t, p = stats.ttest_ind(data1[:, 2], data2[:, 2])
            print("T statistics: %6.3f " % t)
            print("P value     : %8.6f" % p)

        fit = np.polyfit(data1[:, 2], data2[:, 2], deg=1)
        x, y = data1[:, 2], data2[:, 2]
        plt.plot(x, fit[0] * x + fit[1], color='red', lw=2.5)
        plt.scatter(x, y, c='b')
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.show()

    elif args.opt == "transform":
        if args.ds in ['xyz', 'XYZ', '3d']:
            data = mtxh.loadxyz(args.dat[0],
                                args.dtype,
                                args.xyzcol,
                                xyshift=args.xyshift)

            odata = mtxh.xyz2matrix(data)

            np.savetxt(args.out, odata, fmt="%.5f", delimiter=" ")
        else:
            data = mtxh.reshapeMtx(args.dat[0],
                                   args.dtype,
                                   xyshift=args.xyshift)

            np.savetxt(args.out, data, fmt="%.5f", delimiter=" ")
        print("Transform matrix file type completed!")

    elif args.opt == "extract":
        if args.ds in ['xyz', 'XYZ', '3d']:
            data = mtxh.loadxyz(args.dat[0],
                                args.dtype,
                                args.xyzcol,
                                xyshift=args.xyshift)
        elif args.ds in ['matrix', 'mtx']:
            data = mtxh.reshapeMtx(args.dat[0],
                                   args.dtype,
                                   xyshift=args.xyshift)
        else:
            sys.exit(0)
        data = data.astype(np.float)
        d = mtxh.extractDomainData(data, args.xyrange[:2], args.xyrange[2:])
        np.savetxt(args.out, d, fmt="%.5f")
        print("Extract matrix file completed!")

    elif args.opt == "average":
        aver_data = np.array([])
        for i in range(len(args.dat)):
            if args.ds in ['xyz', 'XYZ', '3d']:
                data = mtxh.loadxyz(args.dat[0],
                                    args.dtype,
                                    args.xyzcol,
                                    xyshift=args.xyshift)
            elif args.ds in ['matrix', 'mtx']:
                data = mtxh.reshapeMtx(args.dat[0],
                                       args.dtype,
                                       xyshift=args.xyshift)
            else:
                sys.exit(0)

            print(data.shape)

            if i == 0:
                aver_data = data
            else:
                aver_data[:, 2] += data[:, 2]

        aver_data[:, 2] = aver_data[:, 2] / float(len(args.dat))

        np.savetxt(args.out, aver_data, fmt="%.5f")
        print("Average matrix files completed!")

    elif args.opt == "domain-aver":
        if args.ds in ['xyz', 'XYZ', '3d']:
            data = mtxh.loadxyz(args.dat[0],
                                args.dtype,
                                args.xyzcol,
                                xyshift=args.xyshift)
        elif args.ds in ['matrix', 'mtx']:
            data = mtxh.reshapeMtx(args.dat[0],
                                   args.dtype,
                                   xyshift=args.xyshift)
        else:
            sys.exit(0)
        data = data.astype(np.float)

        data[:, 0] = data[:, 0] + args.start_res
        data[:, 1] = data[:, 1] + args.start_res

        drange = []
        if os.path.exists(args.domain):
            pdb = pdbIO.parsePDB()
            domains = pdb.readDomainRes(args.domain)
            drange = [x[1:] for x in domains]

        else:
            drange = [float(x) for x in args.drange]

        tofile = open(args.out, 'w')
        for i in range(len(drange)):
            tofile.write("# %d %s \n" %
                         (i, " ".join([str(x) for x in drange[i]])))

        print(drange)

        for i in range(len(drange)):
            for j in range(len(drange)):
                if args.dzero and i == j:
                    tofile.write("%3d %3d  0.0 \n" % (i, j))
                else:
                    d = []
                    #print(len(drange[i])/2)
                    for r1 in range(int(len(drange[i]) / 2)):
                        for r2 in range(int(len(drange[j]) / 2)):
                            ccc = mtxh.extractDomainData(
                                data,
                                xrange=drange[i][2 * r1:2 * r1 + 2],
                                yrange=drange[j][2 * r2:2 * r2 + 2])[:, 2]
                            d += list(ccc)
                    tofile.write("%3d %3d %12.3f \n" % (i, j, np.mean(d)))
        tofile.close()

        print("Domain-wise matrix averaging completed!")

    elif args.opt == 'neib0':
        if args.ds in ['xyz', 'XYZ', '3d']:
            data = mtxh.loadxyz(args.dat[0],
                                args.dtype,
                                args.xyzcol,
                                xyshift=args.xyshift)
        elif args.ds in ['matrix', 'mtx']:
            data = mtxh.reshapeMtx(args.dat[0],
                                   args.dtype,
                                   xyshift=args.xyshift)
        else:
            sys.exit(0)
        data = data.astype(np.float)

        print(data.shape)

        newd = mtxh.neiborhood2zero(data,
                                    neiborsize=args.neibsize,
                                    outtype='mtx',
                                    zscale=args.zscale)
        np.savetxt(args.out, newd, fmt='%.3f')
Пример #9
0
def cmap_nbyn(trajs,
              ref,
              rc,
              lc,
              v=True,
              cutoff=0.35,
              allchains=" ABCDEFGH",
              atomtype=["sidechain", "sidechain"]):
    """Generate sidechain based contact maps.

    Parameters
    ----------
    trajs : list of mt.trajectory objects, shape = N
        The input chunks of trajectories, N is number of chunks
    ref : str
        The reference pdb file name
    rc : list, shape = 3
        The residue and chain identifier for x-axis
    lc : list, shape = 3
        The residue and chain identifier for y-axis
    cutoff : float, default = 0.35
        The distance cutoff, in unit nanometer
    v : bool, default = True
        Whether print detail information during the calculation
    allchains : str, default = 'ABCDEFGH'
        All available chain identifiers in the reference pdb files
    Returns
    -------

    """

    pdb = pdbIO.parsePDB(inPDB=ref)
    all_resids = pdb.getNdxForRes(ref, chains=allchains)

    print(all_resids)

    # for chain_a
    resids_a = []
    for i, item in enumerate(all_resids):
        if item[2] in rc[0] and \
                int(item[1]) in np.arange(int(rc[1]), int(rc[2])+1):
            resids_a.append(i)
    print(resids_a)

    # for chain_b
    resids_b = []
    for i, item in enumerate(all_resids):
        if item[2] in lc[0] and \
                int(item[1]) in np.arange(int(lc[1]), int(lc[2])+1):
            resids_b.append(i)
    print(resids_b)

    contact_map = np.array([])
    for i, traj in enumerate(trajs):
        # calculate cmap information
        verbose(v, "Generate cmap for chunk %5d ......" % i)
        contmap = CmapNbyN(traj,
                           resids_a=resids_a,
                           resids_b=resids_b,
                           cutoff=cutoff)

        contmap.cmap_nbyn(atomtype=atomtype)
        if i == 0:
            contact_map = contmap.cmap_
        else:
            contact_map = np.concatenate((contact_map, contmap.cmap_), axis=0)

    return contact_map
Пример #10
0
    def drawTimeSeries2D(
        self,
        cmapmatrix,
        refpdb=[],
        fsize=14,
        xlabel="",
        ylabel="",
        cmaptype="Grey",
        xlim=[],
        ylim=[],
        yticks_loc=[],
        yticks_labels=[],
        yticks_showchainid=False,
        xticks_loc=[],
        xticks_labels=[],
        colorbar_label="",
        colorbar_show=False,
        savefig="",
    ):
        """
        plot the ligand protein interactions (time series, x axis)
        :param cmapmatrix: str, the data file, containing time series matrix file
        :param refpdb: list, [ pdbfilename, chain id, residue sequence shift-by ]
        :param fsize:
        :param xlabel:
        :param ylabel:
        :param cmaptype:
        :param xlim:
        :param ylim:
        :param yticks_loc:
        :param yticks_labels:
        :param yticks_showchainid: whether show chainid of protein residues
        :param xticks_loc:
        :param xticks_lables:
        :return:
        """

        # load cmap file
        cmapdata = np.loadtxt(cmapmatrix, delimiter=",")
        cm_sorted = sorted(list(cmapdata), key=lambda x: x[0], reverse=False)

        # get key protein residues involving protein ligand binding
        key_res = []
        true_res = np.sum(np.asarray(cm_sorted)[:, 1:], axis=0) > 0
        for res in range(true_res.shape[0]):
            if true_res[res]:
                key_res.append(res)
        print("KEY RES ", key_res)

        # get full residue name index list
        res_labels = []
        if len(refpdb) == 3:

            ppdb = pdbIO.parsePDB("")
            fullreslist = ppdb.getNdxForRes(refpdb[0], [refpdb[1]])

            shortresmap = ppdb.longRes2ShortRes()
            fullreslist = [x for x in fullreslist if x[2] in refpdb[1]]

            for resk in key_res:
                if fullreslist[resk][0] in shortresmap.keys():
                    resseq = str(resk + refpdb[2])
                    resname = shortresmap[fullreslist[resk][0]]
                    chainid = fullreslist[resk][2]

                    if yticks_showchainid:
                        id = resname + resseq + chainid
                    else:
                        id = resname + resseq

                    res_labels.append(id)

        # only keep the important residue cmap
        keyres_cmap = np.asarray(cm_sorted)[:, 1:][:, list(key_res)]

        # get the length of x and y axis
        shapex = len(key_res)
        shapey = cmapdata.shape[0] + 1

        print("Protein residue numbers: %d" % shapex)
        print("Time point numbers: %d" % shapey)

        z = np.transpose(keyres_cmap[:, 1:]).T

        plt.pcolormesh(z.T, cmap=plt.get_cmap(cmaptype))

        if colorbar_show:
            plt.colorbar(label=colorbar_label)

        plt.xlabel(xlabel, fontsize=fsize)
        plt.ylabel(ylabel, fontsize=fsize)

        if len(yticks_loc) and len(yticks_labels):
            plt.yticks(yticks_loc, yticks_labels)
        else:
            if len(refpdb) == 3:
                plt.yticks(np.array(range(shapex)) + 0.5, res_labels)

        if len(xlim):
            plt.xlim(xlim)

        if len(ylim):
            plt.ylim(ylim)
        else:
            plt.ylim([0, shapex + 0.5])

        if len(xticks_labels) and len(xticks_loc):
            plt.xticks(xticks_loc, xticks_labels)

        if len(savefig):
            plt.savefig(savefig, dpi=2000)

        plt.show()

        return 1