Exemplo n.º 1
0
def filter_molecules(molecules, filter_strings):
    """
    Filter molecules to produce a new set of molecules containing only atoms that passed the filter(s)

    Args:
        molecules (list of PDB molecules): mmolecular data to filter
        filter_strings (list of strings): filters to apply, using PDB ATOM-style fields

    Returns:
        list of molecules containing filtered data.

    Notes:

        - filter strings are in the form `key=value,key=value,...`, where numerical ranges are INCLUSIVE and
          indicated using a dash to separate the start and end indices, e.g. `name=CA,resSeq=2-14`
        - if no atoms in a molecule pass the filter, an empty molecule results
    """
    filters = {}
    for f in filter_strings:
        PDB.UpdateFilters(f, filters, '=', ',', '-')

    new_molecules = []
    for mol in molecules:
        new_mol = PDB.FilterAtoms(mol, filters)
        new_molecules.append(new_mol)

    return new_molecules
    def add_title_section(self):
        """ HEADER, TITLE, EXPDTA, AUTHOR
        """
        ## add HEADER records
        header = PDB.HEADER()
        self.pdb_file.append(header)

        header["idCode"] = self.struct.structure_id
        self.set_from_cifdb(header, "depDate", "database_pdb_rev",
                            "date_original")
        self.set_from_cifdb(header, "classification", "struct_keywords",
                            "pdbx_keywords")

        ## add TITLE records
        try:
            struct_title = self.struct.cifdb["struct"]["title"]
        except KeyError:
            pass
        else:
            cont = 0
            while len(struct_title):
                stx = struct_title[:60]
                struct_title = struct_title[60:]

                title = PDB.TITLE()
                self.pdb_file.append(title)

                cont += 1
                if cont > 1:
                    title["continuation"] = cont

                title["title"] = stx

        ## add EXPDTA records
        try:
            exptl_method = self.struct.cifdb["exptl"]["method"]
        except KeyError:
            pass
        else:
            expdta = PDB.EXPDTA()
            self.pdb_file.append(expdta)
            expdta["technique"] = exptl_method

        ## add AUTHOR records
        ## XXX: need to write a function to fix author names to PDB format
        try:
            audit_author = self.struct.cifdb["audit_author"]
        except KeyError:
            pass
        else:
            name_list = []
            for cif_row in audit_author:
                try:
                    name_list.append(cif_row["name"])
                except KeyError:
                    pass

            author = PDB.AUTHOR()
            self.pdb_file.append(author)
            author["authorList"] = ",".join(name_list)
Exemplo n.º 3
0
def sumAccesbilityBS(ppocketatom, proteinASA, proteinRSA):

    cASA = PDB.PDB(proteinASA)
    cASA.get_lAtoms()

    cRSA = parseRSAfile.RSA(proteinRSA)

    cBS = PDB.PDB(ppocketatom)
    latomBS = cBS.get_lAtoms()

    dout = {}
    dout["sumASA"] = 0.0
    for atomBS in latomBS:
        for atomProt in cASA.latom:
            if atomBS.chainID == atomProt.chainID and atomBS.name == atomProt.name and atomBS.resName == atomProt.resName and atomBS.serial == atomProt.serial:
                dout["sumASA"] += float(atomProt.Bfact)
                break

    dout["sumRSAabs"] = 0.0
    dout["sumRSArel"] = 0.0
    for res in cRSA.lres:
        for atomBS in latomBS:
            if atomBS.chainID == res.chainID and atomBS.resName == res.resName and atomBS.resSeq == res.resSeq:
                if res.ABSall != "N/A":
                    dout["sumRSAabs"] += float(res.ABSall)
                if res.RELall != "N/A":
                    dout["sumRSArel"] += float(res.RELall)
                break
    return dout
Exemplo n.º 4
0
 def reset_na2_pdb_resid(self, offset):
     pdb_name = path.join(self.mkcrd_folder, 'bdna2.1.pdb')
     f_backup = path.join(self.mkcrd_folder, 'bdna2.1.backup.pdb')
     copyfile(pdb_name, f_backup)
     print(f'{pdb_name} {f_backup}')
     reader = PDB.PDBReader(pdb_name, skip_header=2, skip_footer=1)
     for atom in reader.atomgroup:
         resid = atom.resid
         atom.set_resid(resid + offset)
     writer = PDB.PDBWriter(pdb_name, reader.atomgroup)
     writer.write_pdb()
     print(f'Reset {pdb_name} resid by offset {offset}!')
     print(f'Check by...\nvim {pdb_name}')
Exemplo n.º 5
0
def prepare_log(model_list, out_file_name):
    """Create output log with models created.

    Method takes information from set_dihedral_angles_update to prepare a log
    with the modifications made to the initial structure. It also combines the
    output pdb files into a single pdb file with several states.

    Parameters
    ----------
    data_list : List
        Otput from set_dihedral_angles_update.
    out_file_name : String
        file name for output log and pdb states file.

    Returns
    -------
    None.

    """
    out_file = open(out_file_name+'.log', 'w')

    models_name = []

    out_file.write('{:<3} {:<20} Distance\n'.format('#', 'File name'))

    total_models = len(model_list)

    for model in range(total_models):

        model_name = model_list[model][0]
        distance = model_list[model][1]
        aa_number = model_list[model][2]
        angles = model_list[model][3]

        line = '{:<3}  {:<20} {:<8.2f} {} {}\n'.format(model + 1, model_name,
                                                       distance, aa_number,
                                                       angles)

        out_file.write(line)

        models_name.append(model_name)

    out_file.close()

    # prepare pdb file with each model as a state

    out_file = out_file_name+'.pdb'

    PDB.print_models(models_name, out_file)
    def add_miscellaneous_fatures_section(self):
        """SITE
        """
        serial_num = 0
        for site in self.struct.iter_sites():
            num_fragments = len(site.fragment_dict_list)

            site_pdb = None
            key_index = 0
            for frag_dict in site.fragment_dict_list:

                if site_pdb is None or key_index == 4:
                    serial_num += 1

                    key_index = 0

                    site_pdb = PDB.SITE()
                    self.pdb_file.append(site_pdb)

                    site_pdb["serNum"] = serial_num
                    site_pdb["siteID"] = site.site_id
                    site_pdb["numRes"] = num_fragments

                chain_id = "chainID%d" % (key_index)
                res_name = "resName%d" % (key_index)
                res_seq = "seq%d" % (key_index)
                icode = "icode%d" % (key_index)

            site_pdb[chain_id] = frag_dict["chain_id"]
            site_pdb[res_name] = frag_dict["res_name"]
            try:
                site_pdb[res_seq], site_pdb[
                    icode] = Structure.fragment_id_split(frag_dict["frag_id"])
            except KeyError:
                pass
def save_structures(fpath, structures, dat):
    """
    Slightly different to the save routines in PDBD module; each structure
    has the same "internal" chainID values, and structures are separated into
    PDB 'MODEL' sections. We assume structures are lists of monomer ids.

    Args:
      fpath (string): path to PDB file for output
      structures (list of integer lists): sublists are individual structures, sublist elements are molecule indices for structure members

    Returns:
      list of integer lists, with each sublist denoting a trimer-of-dimers structure and sublist members denoting molecules indices in that structure
    """
    chains = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz123456789'
    f = open(fpath, 'w')
    serial = 1
    for structure in structures:
        chain_i = 0
        serial = 1  # for if we wish to reset count each time (e.g. lots of structures as in the capsid!)
        print >> f, 'MODEL'
        for monomer_i in structure:
            for subunit_i in dat.monomers[monomer_i]:
                for a in dat.subunits[subunit_i]:
                    a2 = dict(a)  # modify a copy of the data, not the original
                    a2['chainID'] = chains[chain_i % len(chains)]

                    a2['serial'] = serial  # change this to retain original serial?
                    serial += 1

                    print >> f, PDB.MakePDBAtomLine(a2)
                print >> f, 'TER'
            chain_i += 1
        print >> f, 'ENDMDL'
    f.close()
Exemplo n.º 8
0
    def computeFPI(self, clean=0):

        frameID = self.plig.split("/")[-1].split("_")[1].split(".")[0]

        pfileFPI = self.prout + "FPI_" + frameID + ".csv"

        # if file exsit => load PFI
        if path.exists(pfileFPI) and clean == 0:
            return pfileFPI

        fileFPI = open(pfileFPI, "w")
        # header
        fileFPI.write("Ligand and pocket res\tList residues in pocket\tFPI\n")

        # define residue pocket
        cPocket = PDB.PDB(self.pBS, hydrogen=1)
        cPocket.get_byres(onlyres=1)
        lres = cPocket.getListResForFPI()

        pyplif.get_FPI(pligPDB=self.plig,
                       ppocketPDB=self.pBS,
                       lres=lres,
                       filout=fileFPI)

        fileFPI.close()
        return pfileFPI
Exemplo n.º 9
0
    def computeRMSFresBS(self, pr_MDout):

        # load BS in frame 0
        l_pBS = listdir(pr_MDout + "BSs/")
        l_res = []
        for pBS in l_pBS:
            cBS = PDB.PDB(pr_MDout + "BSs/" + pBS)
            dres = cBS.get_byres()
            for res in dres.keys():
                nRes = res.split("_")[1]
                if not nRes in l_res:
                    l_res.append(nRes)

        # rewrite RMSF with binding site
        presRMSF = pr_MDout + "RMSDs/residues/resRMSD"
        ldresRMSF = toolbox.matrixToList(presRMSF)

        # rewrting
        pfilout = pr_MDout + "RMSDs/residues/resRMSD_BS"
        filout = open(pfilout, "w")
        filout.write("NameRes\tall\tCa\tDmax\tBS\n")
        for dresRMSF in ldresRMSF:
            if dresRMSF["NameRes"] in l_res:
                BS = 1
            else:
                BS = 0

            filout.write("%s\t%s\t%s\t%s\t%s\n" %
                         (dresRMSF["NameRes"], dresRMSF["all"], dresRMSF["Ca"],
                          dresRMSF["Dmax"], BS))

        filout.close()
        return pfilout
    def add_atom_records(self):
        """With a default model set, output all the ATOM and associated
        records for the model.
        """
        ## atom records for standard groups
        for chain in self.struct.iter_chains():
            res = None

            for res in chain.iter_standard_residues():
                for atm in res.iter_all_atoms():
                    self.add_ATOM("ATOM", atm)

            ## chain termination record
            if res:
                ter_rec = PDB.TER()
                self.pdb_file.append(ter_rec)
                res_seq, icode = Structure.fragment_id_split(res.fragment_id)
                ter_rec["serial"] = self.next_serial_number()
                ter_rec["resName"] = res.res_name
                ter_rec["chainID"] = res.chain_id
                ter_rec["resSeq"] = res_seq
                ter_rec["iCode"] = icode

        ## hetatm records for non-standard groups
        for chain in self.struct.iter_chains():
            for frag in chain.iter_non_standard_residues():
                for atm in frag.iter_all_atoms():
                    self.add_ATOM("HETATM", atm)
    def add_primary_structure_section(self):
        """DBREF,SEQADV,SEQRES,MODRES
        """
        for chain in self.struct.iter_chains():
            if len(chain.sequence) == 0:
                continue

            sernum = 0
            seq_len = len(chain.sequence)
            seq_index = 0
            while seq_index < seq_len:
                seqres = PDB.SEQRES()
                self.pdb_file.append(seqres)

                sernum += 1
                seqres["serNum"] = sernum
                seqres["chainID"] = chain.chain_id
                seqres["numRes"] = seq_len

                for field in [
                        "resName1", "resName2", "resName3", "resName4",
                        "resName5", "resName6", "resName7", "resName8",
                        "resName9", "resName10", "resName11", "resName12",
                        "resName13"
                ]:
                    try:
                        seqres[field] = chain.sequence[seq_index]
                    except IndexError:
                        break
                    seq_index += 1
Exemplo n.º 12
0
def blast_pdb_local(fasta_string, num_hits=1000):
    import subprocess
    import os
    import shlex
    import StringIO
    import simtk.openmm.app as app
    blast_data = os.getenv("DATA_HOME")
    blast_query = 'blastp -db %s/pdbaa -max_target_seqs %d -outfmt' % (blast_data, num_hits)
    out_fmt = '7 qseqid sseqid evalue bitscore'
    blast_cmd = shlex.split(blast_query)
    blast_cmd.append(out_fmt)
    p = subprocess.Popen(blast_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    blast_aln, error = p.communicate(input=fasta_string)
    msmseeds = []
    local_pdb_repo = os.getenv("PDB_HOME")
    for result in blast_aln.splitlines():
        if result[0]!="#":
            res_data = result.split("\t")
            e_value = float(res_data[2])
            template_chain_code =  "_".join(res_data[1].split("|")[3:])
            raw_template_pdb = _read_local_repository(local_pdb_repo, template_chain_code.split("_")[0])
            template_fasta, pdb_resnums = _retrieve_fasta(template_chain_code)
            template_pdb = StringIO.StringIO()
            raw_template_pdbio = StringIO.StringIO(raw_template_pdb)
            raw_template_pdbio.seek(0)
            end_resnums = PDB.extract_residues_by_resnum(template_pdb,raw_template_pdbio, pdb_resnums, template_chain_code.split("_")[1])
            template_pdb.seek(0)
            if template_pdb.len == 0:
                continue
            template_pdbfile = app.PDBFile(template_pdb)
            msmseeds.append(MSMSeed(fasta_string, template_fasta, template_pdbfile, e_value))
    return msmseeds
Exemplo n.º 13
0
def query():
    _query = request.form['inputQuery']
    print('pubmed query started')
    _result = pms.searchCoordinator(_query)
    _resultDict = _result.T.to_dict().values()
    print('pubmed query completed with ' + str(len(_resultDict)) + ' results.')

    print('PDB query started')

    _pdbResult = pdb.search(_query)
    _pdbDict = _pdbResult.T.to_dict().values()

    print('PDB query completed with ' + str(len(_pdbDict)) + ' results.')

    _summaryDict = {}
    _summaryDict["numStructures"] = str(len(_pdbDict))
    _summaryDict["numPapers"] = str(len(_resultDict))

    _pdbResult.dropna()
    _highResStructure = _pdbResult.loc[_pdbResult['Resolution'].replace(
        "", "1000").astype(float).idxmin()]

    _summaryDict["highResID"] = _highResStructure["PDB ID"]
    _summaryDict["highResolution"] = _highResStructure["Resolution"]

    _summaryDict["bestAuthor"] = _result["Senior Author"].mode()[0]

    return render_template('results.html',
                           query=_query,
                           result=_resultDict,
                           pdb=_pdbDict,
                           summary=_summaryDict)
Exemplo n.º 14
0
def print_PDB(f, atoms, bonds, unit_length=None):
    """
	Print geometrical information to PDB file format for visualization.

	Args:
		f (file) : output destination
		atoms (list of PDB-style atom) : vertices
		bonds (list of integer pairs) : vertex connections
		unit_length (integer) : number of consecutive vertices in a notional geometrical unit

	Returns:
		Nothing
	"""
    conect_format = 'CONECT%5d%5d'

    if unit_length == None:
        unit_length = len(atoms)

    counter = 0
    for a in atoms:
        if (counter > 0) and (counter % unit_length == 0):
            print('TER   ', file=f)
        line = PDB.MakePDBAtomLine(a)
        print(line, file=f)
        counter += 1

    print('TER   ', file=f)

    for b in bonds:
        line = conect_format % (b[0], b[1])
        print(line, file=f)
Exemplo n.º 15
0
    def __init__(self, ppocket, pPDB):

        self.pprotein = pPDB
        self.ppocket = ppocket

        cPocket = PDB.PDB(ppocket)  # not included hydrogen
        self.latoms = cPocket.get_lAtoms()
        self.byresall = cPocket.get_byres(onlyres=1)
Exemplo n.º 16
0
    def computeFPIres(self):

        pfileFPI = self.prFPI + "FPIres.csv"
        fileFPI = open(pfileFPI, "w")
        dout = {}
        #header
        fileFPI.write("Residue Name\tList residues in pocket\tFPI\n")

        # need to define one pocket by residues -> folder of pockets
        prpocket = self.prFPI + "Pockets/"
        lppocket = []
        lplig = []
        try:
            makedirs(prpocket)
        except:
            pass

        dres = self.CPDB.get_byres()
        for resatoms in dres.keys():

            #Res as a ligand
            latomres = deepcopy(dres[resatoms])
            PDB.changeRecoder(latomres, "HETATM")
            pligand = prpocket + resatoms + ".pdb"
            self.CPDB.writePDB(latoms=latomres, pfilout=pligand)
            lplig.append(pligand)

            #Define pocket
            latomspocket = self.CPDB.get_BSfromLatom(latomin=latomres)
            ppocket = prpocket + "pocket_" + resatoms + ".pdb"
            self.CPDB.writePDB(latoms=latomspocket, pfilout=ppocket)
            lppocket.append(ppocket)

            #format list of residues considered
            lresformated = PDB.convert_ListAtomtoList(latomspocket)

            #run FPI
            dout[resatoms] = pyplif.get_FPI(pligPDB=pligand,
                                            ppocketPDB=ppocket,
                                            lres=lresformated,
                                            filout=fileFPI)

        fileFPI.close()
        self.pfileFPI = pfileFPI
        self.FPI = dout
Exemplo n.º 17
0
 def getProtein(self, pdbid, filename=None):
     if filename == None:
         filename = findpdbfile(pdbid)
     if DO_REMOTE and filename == None:
         import fetchproteinfile
         import tempfile
         filename = tempfile.mktemp(".pdb")
         fetchproteinfile.fetchPDBFile(pdbid, filename)
     return PDB.readFile(filename)
Exemplo n.º 18
0
def writePDB(universe, configuration, pdb_file_name):
    offset = None
    if universe is not None:
        configuration = universe.contiguousObjectConfiguration(None,
                                                               configuration)
    pdb = PDB.PDBOutputFile(pdb_file_name, 'xplor')
    pdb.write(universe, configuration)
    sequence = pdb.atom_sequence
    pdb.close()
    return sequence
Exemplo n.º 19
0
    def RMSDProt(self):

        prRMSDprot = self.dMD["prRMSD"] + "protein/"
        pathFolder.createFolder(prRMSDprot)
        if not "prSuperMatrix" in dir(self):
            self.Superimpose(0)

        # pfilout
        pfilout = prRMSDprot + "protRMSD"
        if path.exists(pfilout):
            return
        else:
            filout = open(pfilout, "w")
            filout.write("Time\tRMSDall\tRMSDC\tDmax\n0\t0\t0\t0\n")

        # open reference frame
        nframeref = str("%05d" % (0))
        pframeref = self.dMD["prframe"] + "frame_" + nframeref + ".pdb"
        cprotref = PDB.PDB(PDB_input=pframeref)
        cprotref.get_atomProt()

        i = self.stepFrame
        imax = float(self.MDtime) / float(self.timeframe)
        while i < imax:
            nframe2 = str("%05d" % (i))
            pframe2 = self.dMD["prframe"] + "frame_" + nframe2 + ".pdb"

            cprot2 = PDB.PDB(PDB_input=pframe2)
            cprot2.get_atomProt()

            pmatrix = self.prSuperMatrix + str(nframeref) + "_" + str(nframe2)
            #apply matrix on frame 2
            matrixload = toolbox.loadMatrixTMalign(pmatrix)
            for atomprot2 in cprot2.latomProt:
                atomprot2.applyMatrixRotTransloc(matrixload)

            lRMSD = calculate.RMSDTwoList(cprotref.latomProt, cprot2.latomProt)
            filout.write("%s\t%s\t%s\t%s\n" %
                         (i / 100.0, lRMSD[0], lRMSD[1], lRMSD[2]))
            i += self.stepFrame

        filout.close()
        runExternalSoft.runscatterplotRMSD(pfilout)
Exemplo n.º 20
0
    def computeRMSDProt(self, pr_MDout):
        # load ligand in frame 0
        cfram0 = PDB.PDB(pr_MDout + "framesMD/frame_00000.pdb")
        cfram0.get_lAtoms()

        pr_TMalign = pr_MDout + "RMSDs/superimpose/"
        l_pTMaling = listdir(pr_TMalign)

        dRMSD = {}
        for pTMalign in l_pTMaling:
            frame = pTMalign.split("_")[-1]
            #print frame
            dmatrixTMalign = toolbox.loadMatrixTMalign(pr_TMalign + pTMalign)
            cFrame = PDB.PDB("%sframesMD/frame_%s.pdb" % (pr_MDout, frame))
            cFrame.get_lAtoms()

            for atomLig in cFrame.latom:
                atomLig.applyMatrixRotTransloc(dmatrixTMalign)

            RMSDframe = calculate.RMSDTwoList(cfram0.latom, cFrame.latom)
            dRMSD[frame] = RMSDframe

        # write the RMSD lig file
        pfilout = pr_MDout + "RMSDs/protein/protRMSD_all"
        filout = open(pfilout, "w")
        filout.write("Time\tRMSDall\tRMSDC\tDmax\n")
        filout.write("0.0\t0.0\t0.0\t0.0\n")

        i = 1
        imax = len(dRMSD.keys())
        while i <= imax:
            frame = str("%05d" % (i))
            filout.write(
                "%.2f\t%s\t%s\t%s\n" %
                (i / 100.0, dRMSD[frame][0], dRMSD[frame][1], dRMSD[frame][2]))
            i = i + 1
        filout.close()
        return pfilout
Exemplo n.º 21
0
def ASAHydrophobicityPolarity(ppdbasa, pBS):

    cPDBasa = PDB.PDB(ppdbasa)
    latomASA = cPDBasa.get_lAtoms()
    cBS = PDB.PDB(pBS)
    latomBS = cBS.get_lAtoms()

    dcompute = {}
    dcompute["C"] = []
    dcompute["O"] = []
    dcompute["N"] = []
    dcompute["Scys"] = []
    dcompute["Smet"] = []

    for atomBS in latomBS:
        for atomProt in latomASA:
            if atomBS.chainID == atomProt.chainID and atomBS.name == atomProt.name and atomBS.resName == atomProt.resName and atomBS.serial == atomProt.serial:
                if atomBS.element != "S":
                    dcompute[atomBS.element].append(float(atomProt.Bfact))
                else:
                    if atomBS.resName == "CYS":
                        dcompute["Scys"].append(float(atomProt.Bfact))
                    elif atomBS.resName == "MET":
                        dcompute["Smet"].append(float(atomProt.Bfact))
                    else:
                        print atomBS
                        dddd
                break

    polarityASA = (sum(dcompute["O"]) + sum(dcompute["N"]) + sum(
        dcompute["Scys"])) / (sum(dcompute["O"]) + sum(dcompute["N"]) + sum(
            dcompute["Scys"]) + sum(dcompute["Smet"]) + sum(dcompute["C"]))
    hydrophobicityASA = (sum(dcompute["C"]) + sum(dcompute["Smet"])) / (
        sum(dcompute["O"]) + sum(dcompute["N"]) + sum(dcompute["Scys"]) +
        sum(dcompute["Smet"]) + sum(dcompute["C"]))

    return [polarityASA, hydrophobicityASA]
Exemplo n.º 22
0
    def computeRMSDLig(self, pr_MDout):

        # load ligand in frame 0
        clig0 = PDB.PDB(pr_MDout + "lig/LGD_00000.pdb")
        clig0.get_lAtoms()

        pr_TMalign = pr_MDout + "RMSDs/superimpose/"
        l_pTMaling = listdir(pr_TMalign)

        dRMSD = {}
        for pTMalign in l_pTMaling:
            frame = pTMalign.split("_")[-1]
            #print frame
            dmatrixTMalign = toolbox.loadMatrixTMalign(pr_TMalign + pTMalign)
            cligFrame = PDB.PDB("%slig/LGD_%s.pdb" % (pr_MDout, frame))
            cligFrame.get_lAtoms()

            for atomLig in cligFrame.latom:
                atomLig.applyMatrixRotTransloc(dmatrixTMalign)

            RMSDframe = calculate.RMSDTwoList(clig0.latom, cligFrame.latom)
            dRMSD[frame] = RMSDframe[0]

        # write the RMSD lig file
        pfilout = pr_MDout + "RMSDs/ligand/ligRMSD"
        filout = open(pfilout, "w")
        filout.write("Time\tRMSD\n")
        filout.write("0.0\t0.0\n")

        i = 1
        imax = len(dRMSD.keys())
        while i <= imax:
            frame = str("%05d" % (i))
            filout.write("%.2f\t%s\n" % (i / 100.0, dRMSD[frame]))
            i = i + 1
        filout.close()
        return pfilout
    def add_crystallographic_coordinate_transformation_section(self):
        """CRYST1,ORIGXn,SCALEn,MTRIXn,TVECT
        """
        cryst1 = PDB.CRYST1()
        self.pdb_file.append(cryst1)

        unit_cell = self.struct.unit_cell

        cryst1["a"] = self.struct.unit_cell.a
        cryst1["b"] = self.struct.unit_cell.b
        cryst1["c"] = self.struct.unit_cell.c
        cryst1["alpha"] = self.struct.unit_cell.calc_alpha_deg()
        cryst1["beta"] = self.struct.unit_cell.calc_beta_deg()
        cryst1["gamma"] = self.struct.unit_cell.calc_gamma_deg()
        cryst1["sgroup"] = self.struct.unit_cell.space_group.pdb_name
    def add_coordinate_section(self):
        """ MODEL,ATOM,SIGATM,ANISOU,SIGUIJ,TER,HETATM,ENDMDL 
        """
        if len(self.struct.model_list) > 1:
            ## case 1: multiple models
            orig_model = self.struct.default_model

            for model in self.struct.iter_models():
                self.struct.default_model = model

                model_rec = PDB.MODEL()
                self.pdb_file.append(model_rec)
                model_rec["serial"] = model.model_id

                self.add_atom_records()

                endmdl = PDB.ENDMDL()
                self.pdb_file.append(endmdl)

            self.struct.default_model = orig_model

        else:
            ## case 2: single model
            self.add_atom_records()
Exemplo n.º 25
0
    def extractLigBSbyFrame(self, BSCutoff, namelig, clean=0):

        c = 1
        for jobname in self.lMD.keys():
            print c, jobname
            if "prframe" in self.lMD[jobname].keys():
                self.lMD[jobname]["prBSs"] = self.pranalysis + str(
                    jobname) + "/BSs/"
                pathFolder.createFolder(self.lMD[jobname]["prBSs"],
                                        clean=clean)
                self.lMD[jobname]["prLig"] = self.pranalysis + str(
                    jobname) + "/lig/"
                pathFolder.createFolder(self.lMD[jobname]["prLig"],
                                        clean=clean)

                lpframe = [
                    self.lMD[jobname]["prframe"] + i
                    for i in listdir(self.lMD[jobname]["prframe"])
                ]
                nb_frame = len(listdir(self.lMD[jobname]["prframe"]))

                if len(listdir(
                        self.lMD[jobname]["prLig"])) >= nb_frame and len(
                            listdir(self.lMD[jobname]["prBSs"])) >= nb_frame:
                    c += 1
                    print "=> pass"
                    continue
                else:
                    for pframe in lpframe:
                        cPDB = PDB.PDB(pframe, hydrogen=1)
                        latomlig = cPDB.get_lig(namelig)

                        cPDB.get_BSfromlig(dpocket=BSCutoff)
                        # add step of rename atom
                        pLGD = self.lMD[jobname][
                            "prLig"] + "LGD_" + pframe.split("_")[-1]
                        pBS = self.lMD[jobname][
                            "prBSs"] + "BS_" + pframe.split("_")[-1]

                        cPDB.writePDB(pLGD, latomlig, conect=1)
                        cPDB.writePDB(pBS, cPDB.pocketsRES["UNK_900_A"]
                                      )  # default in schrodinger
            c += 1
Exemplo n.º 26
0
def addResidueToResidueList(curr_residue_list, curr_res_num, curr_res_name,
                            curr_atom_list):
    if Common.debug:
        print "Adding atoms to residue '%s' %s: %s" % ( \
            curr_res_name,
            curr_res_num,
            map(lambda x: x.getName(), curr_atom_list))
    r = PDB.Residue(number=curr_res_num,
                    name=curr_res_name,
                    atoms=curr_atom_list,
                    chain=None)

    ## store the chain to the residue accumulation list
    curr_residue_list.append(r)
    ##     if Common.debug: print "Current residue list is", map(lambda x: (x.getName(), x.getNumber()), curr_residue_list)

    ## store the back-pointer for each atom to its residue
    for atom in curr_atom_list:
        atom.setResidue(r)
Exemplo n.º 27
0
def blast_pdb_local(fasta_string, num_hits=1000):
    import subprocess
    import os
    import shlex
    import StringIO
    import simtk.openmm.app as app
    blast_data = os.getenv("DATA_HOME")
    blast_query = 'blastp -db %s/pdbaa -max_target_seqs %d -outfmt' % (
        blast_data, num_hits)
    out_fmt = '7 qseqid sseqid evalue bitscore'
    blast_cmd = shlex.split(blast_query)
    blast_cmd.append(out_fmt)
    p = subprocess.Popen(blast_cmd,
                         stdin=subprocess.PIPE,
                         stdout=subprocess.PIPE)
    blast_aln, error = p.communicate(input=fasta_string)
    msmseeds = []
    local_pdb_repo = os.getenv("PDB_HOME")
    for result in blast_aln.splitlines():
        if result[0] != "#":
            res_data = result.split("\t")
            e_value = float(res_data[2])
            template_chain_code = "_".join(res_data[1].split("|")[3:])
            raw_template_pdb = _read_local_repository(
                local_pdb_repo,
                template_chain_code.split("_")[0])
            template_fasta, pdb_resnums = _retrieve_fasta(template_chain_code)
            template_pdb = StringIO.StringIO()
            raw_template_pdbio = StringIO.StringIO(raw_template_pdb)
            raw_template_pdbio.seek(0)
            end_resnums = PDB.extract_residues_by_resnum(
                template_pdb, raw_template_pdbio, pdb_resnums,
                template_chain_code.split("_")[1])
            template_pdb.seek(0)
            if template_pdb.len == 0:
                continue
            template_pdbfile = app.PDBFile(template_pdb)
            msmseeds.append(
                MSMSeed(fasta_string, template_fasta, template_pdbfile,
                        e_value))
    return msmseeds
Exemplo n.º 28
0
    def superimposedPoseCluster(self):

        for prdocking in self.lprdockingpose:
            prout = self.prout + prdocking.split("/")[-2] + "/"
            lposes = listdir(prdocking)
            proutSUperimpose = prout + "Superimposed/"
            pathFolder.createFolder(proutSUperimpose)
            for cluster in self.clusters.keys():
                pclusterpose = proutSUperimpose + str(cluster) + ".pdb"
                for compound in self.clusters[cluster]:
                    for pose in lposes:
                        if pose[-3:] != "sdf":
                            continue
                        elif search(compound, pose):
                            pposePDB = runExternalSoft.babelConvertSDFtoPDB(
                                prdocking + pose)
                            cpose = PDB.PDB(PDB_input=pposePDB)
                            cpose.renameAtom()
                            cpose.writePDB(pfilout=pclusterpose,
                                           conect=1,
                                           model=1)
Exemplo n.º 29
0
	return mean(v)
# end def 

def find_quartiles(S):
	mid = median(S)
	lower = [t for t in S if t < mid]
	upper = [t for t in S if t > mid]
	return median(lower), mid, median(upper)
# end def

root = pred_fname.split('/')[-1]

# 1. check if length of pdb file and entropy file match
entropy_data = open(entropy_fname).read().strip().split('\n')[1:]
entropy_data = [line.split() for line in entropy_data]
mdl = PDB(pred_fname)

# 1.1 read depth prediction
prediction = [mdl.T(i) for i in range(len(mdl)) if mdl.name(i) == 'CA']
# 1.2 read entropies <- 
# entropies = [float(t[1]) for t in entropy_data]
# following 10 lines make up for NA value in entropies (for benchmarking set only <- with unannotated residues)
buffer_entropy = [t[1] for t in entropy_data]
u = mean_float(buffer_entropy)
entropies = []
for t in buffer_entropy:
	try:
		entropies.append(float(t))
	except ValueError:
		entropies.append(u)
	# end try
Exemplo n.º 30
0
        base = General.getBase(pdb)
        matchf = args.head + '_' + base + '.match'
        if not os.path.isfile(matchf):
            continue
        outname = General.getBase(matchf) + '.' + args.o
        if os.path.isfile(outname):
            continue
        if outname in seen:
            continue
        seen[outname] = 1

        # for the gap test only
        if args.wgap != None:
            pdb = args.wgap + '/' + mut.dir + '/' + pdb
        ##
        pos = PDB.findPositionInPDB(pdb, str(mut.n), mut.c)
        if pos == -1:
            print('cannot found the residue in fragment pdb: '+ pdb)
            continue

        pdb = General.removePath(pdb)
        cmd = ['python', selfbin +'/envForMatches.py','--m', matchf, '--n', str(pos-1), '--o', outname]
        if args.uplimit != None:
            cmd.extend(['--uplimit', args.uplimit])
        if args.wgap != None:
            cmd.append('--wgap')
        cmd = ' '.join(cmd)
        job = General.jobOnCluster([cmd], mut.dir, os.path.realpath(outname))
        job.submit(3)
        jobs.append(job)
        sleep(0.5)
Exemplo n.º 31
0
args = par.parse_args()

out = open(args.o, 'w')

for line in open(args.l):
    line = line.strip()
    path = os.path.basename(os.path.dirname(line))
    file = os.path.basename(line)

    name = General.getBase(file)
    cid = name.split('_')[1]
    dbf = path + '/' + name + '.' + args.ext + '.db'
    if not os.path.isfile(db):
        continue
    db = shelve.open(dbf)
    keys = list(db.keys())
    keys_sort = sorted(keys, key = lambda x: int(x.split(',')[1]))
    for k in keys_sort:
        dbr = db[k]
        if (dbr['phi'] > 180.000) or (dbr['phi'] < -180.000) or (dbr['psi'] > 180.000) or (dbr[psi] < -180.000):
            continue
        outstr = [PDB.t2s(dbr['aa']), dbr['sumcond'], dbr['crwdnes'], dbr['freedom'], dbr['phi'], dbr['psi']]
        outstr = '\t'.join(map(str, outstr))
        out.write(outstr +'\n')
    db.close()
out.close()
        
        
    
    
    
Exemplo n.º 32
0
import os, sys, argparse

sys.path.insert(0, "/home/grigoryanlab/home/fzheng/modules_py")
import General, PDB

par = argparse.ArgumentParser()
par.add_argument("--l", required=True, help="list file")
par.add_argument("--o", required=True, help="output file")
par.add_argument("--std", required=True, help="reference values for AAs")
args = par.parse_args()

std = {}
for a in open(args.std):
    aa, stdv = a.strip().split()
    std[PDB.t2s(aa)] = float(stdv)

out = open(args.o, "w")

for line in open(args.l):
    line = line.strip()
    path = os.path.basename(os.path.dirname(line))
    file = os.path.basename(line)

    # should look at the env file of the full entry
    envf = path + "/" + General.getBase(file) + ".stride"
    if not os.path.isfile(envf):
        continue
    env = open(envf)
    for el in env.readlines():
        if not el.startswith("ASG"):
            continue
Exemplo n.º 33
0
	for i in range(len(cuts)-1):
		content.append(seq[cuts[i]:cuts[i+1]])
	# end for
	content.append(seq[cuts[-1]:])
	content = '\n'.join(content).strip()+'\n'

	out = out + content
	return out
# end def

# read input
fname    = sys.argv[1]
out_root = sys.argv[2]

# get sequence
mdl = PDB(fname)
try:
	mdl.write("tmp.pdb")
except:
	pass
seq = extract_sequence(mdl)

# write output
chains = seq.keys()
for chain in chains:
	fasta_lines = fasta_format(seq[chain], out_root+'_'+chain)

	outfile = out_root+'_'+chain+'.fasta'
	print outfile
	fout = open(outfile, 'w')
	fout.writelines(fasta_lines+'\n')
Exemplo n.º 34
0
    for j in aas:
        pairtable[i+'|'+j] = 0
ctable = {x : 0 for x in aas} 

for l in open(args.l):
    info = l.strip().split('/')
    subdir, name = info[-2], info[-1]
    cfile = General.changeExt(subdir + '/' + name, args.ext)
    if not os.path.isfile(cfile):
        continue
    for ll in open(cfile):
        # if ll.find('contact') != 0:
        #     continue
        info2 = ll.strip().split()
        cond, aa1, aa2 = [info2[args.coln[i]] for i in range(3)] 
        aa1, aa2 = PDB.t2s(aa1), PDB.t2s(aa2)

        # optional, for sc cond
        sc_cond = float(info2[-1])
        if sc_cond > 0.01:
            continue

        if (float(cond) >= args.range[0]) and (float(cond) <= args.range[1]):
            ctable[aa1] += 1
            ctable[aa2] += 1
            pairtable[aa1+'|'+aa2] += 1
            pairtable[aa2+'|'+aa1] += 1
            
sortkey = sorted(pairtable.keys()) 
out = open(args.o, 'w')
for sk in sortkey:
Exemplo n.º 35
0
dirs = [x for x in os.listdir('.') if os.path.isdir(x)]
dirs.sort()
odir = os.getcwd()

for d in dirs:
    os.chdir(odir)
    os.chdir(d)
    pdbs = glob.glob('*.pdb')
    if len(pdbs) <= 1:
        continue
    pdbid = d.split('_')[0].lower()
    opdbf = args.sdir + '/' + pdbid + '.clean.pdb'

    cen = d.split('_')[1]
    cenres = PDB.getResByInd(opdbf, cen[0], cen[2:])
    if cenres.getResname() == 'GLY':
        carbon = 'CA'
    else:
        carbon = 'CB'
    cencoords = cenres.getAtom(carbon).getCoords()

    out = d + '.' + args.o
    outfh = open(out, 'w')
    cons = []
    for pdb in pdbs:
        con = General.getBase(pdb).split('_')[-1]
        cons.append([con[0], con[1:]])
    for i in range(len(cons)):
        conres = PDB.getResByInd(opdbf, cons[i][0], cons[i][1])
        carbon = 'CB'
Exemplo n.º 36
0
for l in open(args.l):
    info = l.strip().split('/')
    subdir, name = info[-2], General.getBase(info[-1])
    pid, cid = name.split('_')
    # use the environment file for the whole protein but cmap file for single chain, don't know if this is good
    envfile = subdir + '/' + name + '.' + args.e # in the case of freedom, will keep the chain name; (if needed will also change for older cmap files)
    if not (os.path.isfile(envfile)):
        continue

    db = shelve.open(envfile + '.db')

    with open(envfile) as cf:
        for cfl in cf:
            if not cfl.startswith('contact'):
                continue
            cfa = cfl.strip().split()
            res1, res2, cond, aa1, aa2 = cfa[1:]
            if (res1[0] != cid) or (res2[0] != cid): # not relevant chain
                continue
            if (not res1 in db) or (not res2 in db): # not this residue in database
                continue
            if (not aa1 in PDB.aaa2a) or (not aa2 in PDB.aaa2a): # non-standard amino acid
                continue
            if (float(cond) >= args.range[0]) and (float(cond) <= args.range[1]):
                db1, db2 = db[res1], db[res2]
                outstr = '\t'.join(map(str, [aaindex[PDB.t2s(aa1)], aaindex[PDB.t2s(aa2)], cond, db1['sumcond'], db1['crwdnes'], db1['freedom'], db2['sumcond'], db2['crwdnes'], db2['freedom']]))
                fh.write(outstr + '\n')
    db.close()
fh.close()
Exemplo n.º 37
0
        if cond > args.ccut[0]:
            if res1 in contacts1:
                contacts1[res1].append(res2)
            if res2 in contacts1:
                contacts1[res2].append(res1)
        if cond > args.ccut[1]:
            if res1 in contacts2:
                contacts2[res1].append(res2)
                conds[res1][res2] = cond
            if res2 in contacts2:
                contacts2[res2].append(res1)
                conds[res2][res1] = cond
    if cmapl.startswith('freedom'):
        cmaplsp = cmapl.strip().split()
        resid, fd, phi, psi, resname = cmaplsp[1].replace(',', ''), float(cmaplsp[2]), float(cmaplsp[3]), float(cmaplsp[4]), cmaplsp[-1]
        aaidentity[resid] = PDB.t2s(resname)
        if resid in freedom:
            freedom[resid] = fd
        if resid in phipsi:
            phipsi[resid] = [phi, psi]

dirbb, dirloc, dirtwo = './backbone/', './local/', './twores_nonr/'

# read background potentials
aatypes = 'A C D E F G H I K L M N P Q R S T V W Y'
aatypes = aatypes.split()
aaindex = {aatypes[x] : x for x in range(20)}

# freedom potential
freedom_lines = open(args.em).readlines()
freedom_bins = [float(x) for x in freedom_lines[0].strip().split()]
Exemplo n.º 38
0
def parseVerbose(vf, mut):
    # parse the verbose file
    vfh = open(vf)

    contacts = {}
    cons_aa = {}
    con = None
    rotamerpairs = []

    for vfh_l in vfh:
        if vfh_l.startswith("crwdnes"):
            break
            # looking for contacts involving the central residue
        if vfh_l.startswith("contact"):

            if con != None:
                # first, dump the last contact
                if len(rotamerpairs) > 0:
                    contacts[con] = rotamerpairs
                con = None
                rotamerpairs = []

            items = vfh_l.strip().split()
            if mut.c + "," + str(mut.n) in items:
                cenfirst = 0
                if items[1] == mut.c + "," + str(mut.n):
                    cenfirst = 1
                contmp, con_aa = items[1 + cenfirst], items[4 + cenfirst]
                # only look at contact in the same chain
                if not contmp.startswith(mut.c):
                    continue
                else:
                    con = contmp
                    cons_aa[con] = con_aa

        elif con != None:  # reading rotamer pairs under the potential contacts
            items = vfh_l.strip().split()
            if items[0 + cenfirst * 3] == con_aa:
                if items[3 - cenfirst * 3] == PDB.s2t(mut.w):
                    rotamerpairs.append(mut.w + " " + vfh_l.strip().split()[-1])
                if items[3 - cenfirst * 3] == PDB.s2t(mut.m):
                    rotamerpairs.append(mut.m + " " + vfh_l.strip().split()[-1])
                # the last one can be left
    if len(rotamerpairs) > 0:
        contacts[con] = rotamerpairs

        # now go back to the beginning of file and read the denominator
        # first look at the denominator of the central residue
    vfh.seek(0)
    denominator_cen = []
    cen = False
    for vfh_l in vfh:
        if vfh_l.startswith("end of rotamer filtering"):
            break
        if vfh_l.startswith("position"):
            if cen == True:
                break
            if vfh_l.strip().split()[-1] == mut.c + "," + str(mut.n):
                cen = True
        elif vfh_l.startswith(PDB.s2t(mut.w)) or vfh_l.startswith(PDB.s2t(mut.m)):
            if cen == True:
                info = (
                    vfh_l.strip().split(":")[0]
                    + " "
                    + " ".join([x.strip().split()[1] for x in vfh_l.strip().split(":")[1].split(";")[:-1]])
                )
                denominator_cen.append(info)
    vfh.seek(0)

    denominator_cons = {}
    con = None
    for vfh_l in vfh:
        if vfh_l.startswith("end of rotamer filtering"):
            break
        if vfh_l.startswith("position"):
            if vfh_l.strip().split()[-1] in contacts:
                con = vfh_l.strip().split()[-1]
        elif con != None:
            if vfh_l.startswith(cons_aa[con]):
                info = (
                    vfh_l.strip().split(":")[0]
                    + " "
                    + " ".join([x.strip().split()[1] for x in vfh_l.strip().split(":")[1].split(";")[:-1]])
                )
                denominator_cons[con] = info
                con = None

    return contacts, denominator_cons, denominator_cen
Exemplo n.º 39
0
def computeContacts(mut, contacts, denominator_cons, denominator_cen, outf):
    # for each contact, compute two separated values for wild type and mutant
    contacts_keys = [x for x in contacts.keys() if x.split(",")[1].isdigit()]
    contacts_keys = sorted(contacts_keys, key=lambda x: int(x.split(",")[1]))
    idx = 1
    outfh = open(outf, "w")
    normal_cmap = args.sdir + "/" + mut.p.lower() + ".clean.cmap"

    for con in contacts_keys:
        numerator_w = sum([float(x.split()[1]) for x in contacts[con] if x.split()[0] == mut.w])
        numerator_m = sum([float(x.split()[1]) for x in contacts[con] if x.split()[0] == mut.m])
        if numerator_w == 0:
            contactdegree_w = 0
        else:
            denominator_cen_w = [x for x in denominator_cen if x.split()[0] == PDB.s2t(mut.w)][0].split()[1:]
            denominator_con = denominator_cons[con].split()[1:]
            denominator_w = 0
            for p1, p2 in itertools.product(denominator_cen_w[1:], denominator_con[1:]):
                denominator_w += float(p1) * float(p2)
            denominator_w *= float(denominator_cen_w[0]) * float(denominator_con[0])
            contactdegree_w = numerator_w / denominator_w
        if numerator_m == 0:
            contactdegree_m = 0
        else:
            denominator_cen_m = [x for x in denominator_cen if x.split()[0] == PDB.s2t(mut.m)][0].split()[1:]
            denominator_con = denominator_cons[con].split()[1:]
            denominator_m = 0
            for p1, p2 in itertools.product(denominator_cen_m[1:], denominator_con[1:]):
                denominator_m += float(p1) * float(p2)
            denominator_m *= float(denominator_cen_m[0]) * float(denominator_con[0])
            contactdegree_m = numerator_m / denominator_m
        contactdegree_w, contactdegree_m = format(contactdegree_w, ".4f"), format(contactdegree_m, ".4f")

        # also add normal contact degree between these two position:
        normal_cond = Fragment.getConD(normal_cmap, mut.c + "," + str(mut.n), con)
        normal_cond = format(normal_cond, ".4f")

        outstring = (
            "\t".join(
                map(
                    str,
                    [
                        idx,
                        mut.p,
                        mut.c + "," + str(mut.n),
                        con,
                        contactdegree_w,
                        contactdegree_m,
                        normal_cond,
                        denominator_cons[con].split()[0],
                    ],
                )
            )
            + "\n"
        )
        idx += 1
        outfh.write(outstring)

        # there could be some contacts which are not detected by two side chains
    normal_cons, normal_conress = Fragment.contactList(normal_cmap, mut.c, mut.n, dcut=0.01)
    for k in range(len(normal_cons)):
        if normal_cons[k] in contacts_keys:  # already indexed contact
            continue
        else:
            normal_cond = Fragment.getConD(normal_cmap, mut.c + "," + str(mut.n), normal_cons[k])
            normal_cond = format(normal_cond, ".4f")
            outstring = (
                "\t".join(
                    map(
                        str,
                        [
                            idx,
                            mut.p,
                            mut.c + "," + str(mut.n),
                            normal_cons[k],
                            0.0000,
                            0.0000,
                            normal_cond,
                            normal_conress[k],
                        ],
                    )
                )
                + "\n"
            )
            idx += 1
            outfh.write(outstring)

            # it would be also nice to have permanent contact, but they are rare and probably captured by environment

    outfh.close()
Exemplo n.º 40
0
def ResidueListSequence(residues):
    import PDB
    seq = ""
    for r in residues:
        seq += PDB.residueLetter(r.resName)
    return seq
Exemplo n.º 41
0
    info = l.strip().split('/')
    subdir, name = info[-2], info[-1]
    # use the environment file for the whole protein but cmap file for single chain, don't know if this is good
    envfile = General.changeExt(subdir + '/' + name.split('_')[0], args.e)
    cmapfile = General.changeExt(subdir + '/' + name, args.c)
    
    if not (os.path.isfile(envfile) and os.path.isfile(cmapfile)):
        continue
    
    env = {}
    with open(envfile) as ef:
        f_csv = csv.DictReader(ef, delimiter = '\t')
        for row in f_csv:
            env[row['residue']] = row['environment_score']
    
    cf = open(cmapfile)
    for cfl in cf:
        if not cfl.startswith('contact'):
            continue
        cfa = cfl.strip().split()
        res1, res2, cond, aa1, aa2 = cfa[1:]
        if (not res1 in env) or (not res2 in env):
            continue
        if (float(cond) >= args.range[0]) and (float(cond) <= args.range[1]):
            outstr = '\t'.join(map(str, [General.getBase(name), cond, PDB.t2s(aa1), PDB.t2s(aa2), env[res1], env[res2]])) + '\n'
            out.write(outstr)

         
        
        
    
Exemplo n.º 42
0
    def __init__(self, *items, **properties):
        """
        :param items: either a sequence of peptide chain objects, or
                      a string, which is interpreted as the name of a
                      database definition for a protein.
                      If that definition does not exist, the string
                      is taken to be the name of a PDB file, from which
                      all peptide chains are constructed and
                      assembled into a protein.
        :keyword model: one of "all" (all-atom), "no_hydrogens" or "none"
                        (no hydrogens),"polar_hydrogens" or "polar"
                        (united-atom with only polar hydrogens),
                        "polar_charmm" (like "polar", but defining
                        polar hydrogens like in the CHARMM force field),
                        "polar_opls" (like "polar", but defining
                        polar hydrogens like in the latest OPLS force field),
                        "calpha" (only the |C_alpha| atom of each residue).
                        Default is "all".
        :type model: str
        :keyword position: the center-of-mass position of the protein
        :type position: Scientific.Geometry.Vector
        :keyword name: a name for the protein
        :type name: str
        """
        if items == (None,):
            return
        self.name = ''
        if len(items) == 1 and type(items[0]) == type(''):
            try:
                filename = Database.databasePath(items[0], 'Proteins')
                found = 1
            except IOError:
                found = 0
            if found:
                blueprint = Database.BlueprintProtein(items[0])
                items = blueprint.chains
                for attr, value in vars(blueprint).items():
                    if attr not in ['type', 'chains']:
                        setattr(self, attr, value)
            else:
                import PDB
                conf = PDB.PDBConfiguration(items[0])
                model = properties.get('model', 'all')
                items = conf.createPeptideChains(model)
        molecules = []
        for i in items:
            if ChemicalObjects.isChemicalObject(i):
                molecules.append(i)
            else:
                molecules = molecules + list(i)
        for m, i in zip(molecules, range(len(molecules))):
            m._numbers = [i]
            if not m.name:
                m.name = 'chain'+`i`
        ss = self._findSSBridges(molecules)
        new_mol = {}
        for m in molecules:
            new_mol[m] = ([m],[])
        for bond in ss:
            m1 = new_mol[bond[0].topLevelChemicalObject()]
            m2 = new_mol[bond[1].topLevelChemicalObject()]
            if m1 == m2:
                m1[1].append(bond)
            else:
                combined = (m1[0] + m2[0], m1[1] + m2[1] + [bond])
                for m in combined[0]:
                    new_mol[m] = combined
        self.molecules = []
        while new_mol:
            m = new_mol.values()[0]
            for i in m[0]:
                del new_mol[i]
            bonds = m[1]
            if len(m[0]) == 1:
                m = m[0][0]
                m._addSSBridges(bonds)
            else:
                numbers = sum((i._numbers for i in m[0]), [])
                m = ConnectedChains(m[0])
                m._numbers = numbers
                m._addSSBridges(bonds)
                m._finalize()
                for c in m:
                    c.parent = self
            m.parent = self
            self.molecules.append(m)

        self.atoms = []
        self.chains = []
        for m in self.molecules:
            self.atoms.extend(m.atoms)
            if hasattr(m, 'is_connected_chains'):
                for c, name, i in zip(range(len(m)),
                                   m.chain_names, m._numbers):
                    self.chains.append((m, c, name, i))
            else:
                try: name = m.name
                except AttributeError: name = ''
                self.chains.append((m, None, name, m._numbers[0]))
        self.chains.sort(lambda c1, c2: cmp(c1[3], c2[3]))
        self.chains = map(lambda c: c[:3], self.chains)

        self.parent = None
        self.type = None
        self.configurations = {}
        try:
            self.name = properties['name']
            del properties['name']
        except KeyError: pass
        if properties.has_key('position'):
            self.translateTo(properties['position'])
            del properties['position']
        self.addProperties(properties)

        undefined = 0
        for a in self.atoms:
            if a.position() is None:
                undefined += 1
        if undefined > 0 and undefined != len(self.atoms):
            Utility.warning('Some atoms in a protein ' +
                            'have undefined positions.')
Exemplo n.º 43
0
		try:
			solsol_neighbours[sol2].append(sol1)
		except KeyError:
			solsol_neighbours.update({sol2:[sol1]})
		# end try
	# end if
# end for

for key in solsol_neighbours:
	solsol_neighbours[key] = list(set(solsol_neighbours[key]))
# end for
	
solvents = mol_neighbours.keys()
residues = sol_neighbours.keys()

mdl = PDB(fname)
Pred = dict([[str(mdl.resSeq(i)) + ':' + mdl.chainID(i), max(mdl.T()[i], 0)] for i in range(len(mdl))])

def separate_by_chains(S):
	keys = list(set([res.split(':')[-1] for res in S]))
	chains = dict([ [key, []] for key in keys ])
	for res in S:
		key = res.split(':')[-1]
		chains[key].append(res)
	# end for
	return chains
# end def

# Assign primary probability measure to solvent
Prob_sol = {}
for solvent in solvents:
Exemplo n.º 44
0
cid, resnum = args.resid

for seqf in seqfs:
	pdbf = General.changeExt( seqf.replace(args.head + '_', ''), 'pdb')
	if not os.path.isfile(pdbf):
		print(pdbf + ' doesn\'t exist!')
		continue

	outf = General.changeExt(pdbf, args.o)

	if args.wgap != None: # specific to gap
		assert args.conR == False, 'wgap and conR cannot be specified simultaneously'
		dirname = General.getBase(pdbf)
		pdbf = args.wgap + '/' + dirname + '/'+ pdbf

	index = PDB.findPositionInPDB(pdbf, resnum, cid)
	aacol = Analyze.readColumn(seqf, index, top = args.uplimit)

	if args.conR: # should contacting residue be constrained?
		conid = General.getBase(seqf).split('_')[-1]
		ccid, cresnum = conid[0], conid[1:]
		cindex = PDB.findPositionInPDB(pdbf, cresnum, ccid)
		cres = PDB.getResByInd(pdbf, ccid, cresnum).getResname()
		cres = PDB.t2s(cres)
		caacol = Analyze.readColumn(seqf, cindex, top = args.uplimit)

	if args.env != None: # environment corrected counts
		envf = General.getBase(seqf.replace(args.head, args.envhead)) + '.' + args.env
		if not os.path.isfile(envf):
			print(envf + ' doesn\'t exist!')
			continue
Exemplo n.º 45
0
dirs = [x for x in os.listdir('.') if os.path.isdir(x)]
dirs.sort()
odir = os.getcwd()

for d in dirs:
	os.chdir(odir)
	os.chdir(d)
	pdbs = glob.glob('*.pdb')
	cmds = []
	resn = int(d.split('_')[1][2:])
	for pdb in pdbs:
		matchf = args.head + '_' + General.changeExt(pdb, 'match')
		if not os.path.isfile(matchf):
			continue

		pos = PDB.findPositionInPDB(pdb, resn)
		# if output file is already there, skip the job
		if os.path.isfile('nr'+args.id +'_'+matchf):
			continue

		cmd = ['python', selfbin + '/removeLocalRedundancy.py', '--m', matchf, '--cres', str(pos), '--id', args.id, '--outh', 'nr'+args.id]
		if not args.db == None:
			cmd.extend(['--db', args.db])
		if args.conR:
			conresn = General.getBase(pdb).split('_')[2][1:]
			conpos = PDB.findPositionInPDB(pdb, conresn)
			cmd.extend(['--conres', str(conpos)])
		if args.env != None:
			cmd.extend(['--env', args.env])
		cmd = ' '.join(cmd)
		cmds.append(cmd)
Exemplo n.º 46
0
	W_e = p_self_m - p_self_w
	W_pp = p_pp_m - p_pp_w # select correct file for backbone term if using pp potential!

	# predicting contact potential
	cpf = '/'.join([args.conpot, mut.dir, mut.dir +'.cons'])
	assert os.path.isfile(cpf), cpf
	cpls = open(cpf).read().splitlines()
	# current all contacts above 0.01 (everything searched) are considered (but this has not considered the side chains, which may cause problems)
	cp_inds = [ x.strip().split() for x in cpls if float(x.split()[4]) > 0.01 ]
	ddG_cp = 0.0

	if len(cp_inds) != 0:
		for i in range(len(cp_inds)):
			con_c, con_n = cp_inds[i][3].split(',')
			cres = PDB.t2s(cp_inds[i][-1])
			cond = float(cp_inds[i][4])
			cdbin = mustpress.determineBin(condbins, cond)
			conpots_file = conpots[priR_list[cdbin]]
			ddG_cpi = conpots_file[aaindex[mut.w], aaindex[cres]] - conpots_file[aaindex[mut.m], aaindex[cres]]
			ddG_cp -= ddG_cpi

	# predicting backbone related terms
	consf = '/'.join([args.bb[0], mut.dir, mut.dir + '.cons'])
	assert os.path.isfile(consf), consf
	conls = open(consf).read().splitlines()
	conds_inds = [ [float(x.split()[4]), x.split()[3]] for x in conls if float(x.split()[4]) > args.cutbb] # contact degrees, and contact residue number
	# conds_inds = [x for x in conds_inds if abs(int(x[1].split(',')[1]) - mut.n) > 5 ]
	for i in range(len(conds_inds)):
		countf = '/'.join([args.bb[0], mut.dir, mut.dir + '_' + conds_inds[i][1].replace(',', '') + '.' + args.bb[1]])
		if os.path.isfile(countf):
Exemplo n.º 47
0
import os, sys
import General, PDB

if len(sys.argv) - 1 != 2:
	print '<usage> [list pdb file] [output .fa file]'
	exit(0)

lst, fasta = sys.argv[1:]

out = open(fasta, 'w')
for l in open(lst):
	pdbf = l.strip()
	name = General.removePath(pdbf)
	seqs = PDB.pdb2seq(pdbf)
	out.write('>'+pdbf+'\n')
	for c in seqs: # because only single chain
		out.write(seqs[c]+'\n')





Exemplo n.º 48
0
# This is part of DEPTH.
# DEPTH (Version: 2.0) computes the closest distance of a residue/atom to bulk solvent and predicts small molecule binding site of a protein. 
# Copyright (C) 2013, Kuan Pern Tan, Nguyen Thanh Binh, Raghavan Varadarajan and M.S. Madhusudhan
# 
# DEPTH is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
# DEPTH is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public License along with DEPTH.  If not, see <http://www.gnu.org/licenses/>.

import sys
from PDB import *

fname = sys.argv[1]
mdl = PDB(fname)

binding_residues = []
for i in range(len(mdl)):
	if mdl.T(i) == 1:
		res = str(mdl.resSeq(i))+':'+mdl.chainID(i)
		if res not in binding_residues:
			binding_residues.append(res)
		# end if
	# end if
# end for

out = ' '.join(binding_residues)
print out
Exemplo n.º 49
0
    # end for
    content.append(seq[cuts[-1]:])
    content = '\n'.join(content).strip() + '\n'

    out = out + content
    return out


# end def

# read input
fname = sys.argv[1]
out_root = sys.argv[2]

# get sequence
mdl = PDB(fname)
try:
    mdl.write("tmp.pdb")
except:
    pass
seq = extract_sequence(mdl)

# write output
chains = seq.keys()
for chain in chains:
    fasta_lines = fasta_format(seq[chain], out_root + '_' + chain)

    outfile = out_root + '_' + chain + '.fasta'
    print outfile
    fout = open(outfile, 'w')
    fout.writelines(fasta_lines + '\n')
Exemplo n.º 50
0
par.add_argument('--sl', help = 'use a searchDB list file')
par.add_argument('--o', required = True, help = 'name of the output file')
args = par.parse_args()

out = open(args.o, 'w')

def outputSeq(seqs, name, out, chains = None):
	if (chains != None) and (not isinstance(chains, list)):
		chains = list(chains)
	keys = seqs.keys()
	keys.sort()
	for k in keys:
		if (chains != None) and (k not in chains):
			continue
		out.write('>' + name + '_' + k + '\n')
		out.write(seqs[k]+'\n')

if args.sl == None:
	for l in open(args.pl):
		pid, cid = l.strip().split('_')
		p = pid.lower() + '.clean.pdb'
		seqs = PDB.pdb2seq(p)
		outputSeq(seqs, pid.lower(), out, cid)		
else:
	for l in open(args.sl):
		p = General.changeExt(l.rstrip('\n'), 'pdb')
		seqs = PDB.pdb2seq(p)
		name = General.removePath(p).split('.')[0]
		outputSeq(seqs, name, out)
out.close()
Exemplo n.º 51
0
Arquivo: fpd.py Projeto: vancalory/PDZ
# start and end residue for the peptide chain
pepstart = p.numResidues() + 1
pepend = pepstart + pepchain.numResidues() -1

# add peptide backbone from template to receptor
rec = p + pepchain.copy()
writePDB('_start.pdb', rec)

if args.flip:
    # flip the structure
    os.system('perl -w '+ SELFBIN + '/flipPeptideChirality.pl _start.pdb _startf.pdb 0')

    # mutate the chirality of the residues on the domain
    domainseq = []
    for dres in p.iterResidues():
        dresname = PDB.s2t(PDB.t2s(dres.getResname()))
        if dresname != 'GLY':
            domainseq.append('D'+ dresname)
        else:
            domainseq.append(dresname)
    pdz.normalMut('_startf.pdb', range(1, pepstart), domainseq, '_startf1.pdb')

    # mutate the residue on peptide
    pepseq = []
    for pres in args.pseq:
        if len(pres) == 4: # if should be a D-residue, make the name to be a L-residue (since the domain has been flipped)
            pepseq.append(pres[1:])
        if len(pres) == 3:
            if pres != 'GLY':
                pepseq.append('D' + pres)
            else:
Exemplo n.º 52
0
weights = np.array(args.weights)
outf.write('#weights:' + '\t' + '\t'.join([str(x) for x in weights])+'\n')
aatypes = 'A C D E F G H I K L M N P Q R S T V W Y'
outf.write(aatypes + '\n')

for i in range(1, len(residues)-1):
    res = residues[i]
    resid = res.getChid() + str(res.getResnum())
    scf = pid + '_' + resid + '.' + args.ext
    if not os.path.isfile(scf):
        continue

    wsc_allaa = []
    with open(scf) as sf:
        lines = sf.readlines()
        for l in lines:
            lsp = l.strip().split()
            assert len(lsp) == len(weights)
            sc = lsp[1:]
            sc.insert(0, 1.0)
            sc = np.array(sc, dtype = 'float')
            weighted_sc = (sc * weights).sum()
            wsc_allaa.append(weighted_sc)

    p_aa = np.exp(-np.array(wsc_allaa)/args.t)
    p_aa = p_aa / p_aa.sum()
    outf.write(str(res.getResnum()) + ' ' + PDB.t2s(res.getResname()) + ' ' + ' '.join([format(x, '.3f') for x in p_aa]) + '\n')

outf.close()

Exemplo n.º 53
0
par.add_argument('--l', required = True, help = 'a list of pdb files')
par.add_argument('--o', required = True, help = 'an output file')
par.add_argument('--multi', action = 'store_true', help = 'if true, multi-chain is allowed and output')
args = par.parse_args()

ofh = open(args.o, 'w')

for l in open(args.l).readlines():
    pdbf = l.strip()
    mol = parsePDB(pdbf)
    nchains = mol.numChains()
    if not args.multi:
        if nchains > 1:
            print 'Warning: ' + pdbf + ' has more than 1 chains'
            continue
    seqs = {}
    for res in mol.iterResidues():
        cid, resname = res.getChid(), res.getResname()
        if not cid in seqs:
            seqs[cid] = ''
        if not resname in PDB.aaa2a:
            seqs[cid] += 'X'
        else:
            seqs[cid] += PDB.t2s(resname)
    keys = seqs.keys()
    keys.sort()
    for k in keys:
        ofh.write('>' + pdbf + '|' + k + '|' + str(len(seqs[k])) + '\n')
        ofh.write(seqs[k] + '\n')

Exemplo n.º 54
0
    pdbs = glob.glob('*.pdb')
    pdbs.sort()

    for pdb in pdbs:
        base = General.getBase(pdb)
        matchf = args.head + '_' + base + '.match'
        if not os.path.isfile(matchf):
            continue
        outname = General.getBase(matchf) + '.' + args.o
        if os.path.isfile(outname):
            continue
        if outname in seen:
            continue
        seen[outname] = 1

        pos = PDB.findPositionInPDB(pdb, str(mut.n), mut.c)
        if pos == -1:
            print('cannot found the residue in fragment pdb: '+ pdb)
            continue

        con = base.split('_')[-1]
        conc, conn = con[0], con[1:]
        conpos = PDB.findPositionInPDB(pdb, str(conn), conc)

        cmd = ['python', selfbin +'/envForMatches_pair.py','--m', matchf, '--n', str(pos-1), str(conpos-1), '--o', outname]
        if args.uplimit != None:
            cmd.extend(['--uplimit', args.uplimit])

        cmd = ' '.join(cmd)
        job = General.jobOnCluster([cmd], mut.dir, os.path.realpath(outname))
        job.submit(3)