示例#1
1
def pdb_fix_pdbfixer(pdbid, file_pathway, ph, chains_to_remove):
    """

    Args:
        pdbid: 4 letter string specifying the PDB ID of the file yoou want to fix
        file_pathway: a string containing the pathway specifying how you want to organize the PDB files once written
        ph: the pH at which hydrogens will be determined and added
        chains_to_remove: dictionary containing pdbs with chains to remove
    Returns: nothing, but it does right PDB files

    """
    print(pdbid)

    # Download the topology from rcsb based on pdbod
    fixer = PDBFixer(pdbid=pdbid)

    # Remove chains based on hand curated .csv file
    if pdbid in chains_to_remove['pdbid']:
        chains = chains_to_remove['chain_to_remove'][chain_to_remove['pdbid'].index(pdbid)]
        chains_list = chains.split()
        fixer.removeChains(chainIds=chains_list)

    # Determine the first and last residue resolved in chain 0
    chains = [chain for chain in fixer.topology.chains()]
    resindices = [residue.index for residue in chains[0].residues()]
    resindices = natsorted(resindices)
    first_resindex = resindices[0]
    last_resindex = resindices[-1]

    # Find Missing residues and determine if they are C or N terminal fragments (which will be removed)

    fixer.findMissingResidues()
    if len(fixer.missingResidues) > 0:
        if sorted(fixer.missingResidues.keys())[0][-1] <= first_resindex:
            fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[0]))

        if sorted(fixer.missingResidues.keys())[-1][-1] >= last_resindex:
            fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[-1]))

    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(ph)
    # Write fixed PDB file, with all of the waters and ligands
    PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway,
                                                                         '%s_fixed_ph%s.pdb' % (pdbid, ph)), 'w'),
                      keepIds=keepNumbers)

    # Remove the ligand and write a pdb file
    fixer.removeHeterogens(True)
    PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway,
                                                                         '%s_fixed_ph%s_apo.pdb' % (pdbid, ph)), 'w'),
                      keepIds=keepNumbers)
    # Remove the waters and write a pdb file
    fixer.removeHeterogens(False)
    PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway,
                                                                         '%s_fixed_ph%s_apo_nowater.pdb' % (pdbid, ph)),
                                                            'w'), keepIds=keepNumbers)
示例#2
0
文件: ui.py 项目: leeping/pdbfixer
def startPageCallback(parameters, handler):
    global fixer
    if 'type' in parameters:
        if parameters.getfirst('type') == 'local':
            fixer = PDBFixer(file=parameters['pdbfile'].value.decode().splitlines())
        else:
            id = parameters.getfirst('pdbid')
            try:
                fixer = PDBFixer(pdbid=id)
            except:
                handler.sendResponse(header+"Unable to download the PDB file. This may indicate an invalid PDB identifier, or an error in network connectivity."+loadHtmlFile("error.html"))
        displayDeleteChainsPage()
    def read_and_repair(self, path_pdb: str):
        '''
        params:
            path_pdb (str) path to structrue
        return:
            pdb (PDBFixer object)
            invalid_residues (list[residues]) residues to remove
        '''
        assert os.path.isfile(path_pdb)
        fixer = PDBFixer(filename=path_pdb)
        #fixer.removeHeterogens(keepWater=False)
        #fixer.addMissingHydrogens()
        #fixer.findNonstandardResidues()
        #fixer.replaceNonstandardResidues()

        fixer.findMissingResidues()
        fixer.findNonstandardResidues()
        fixer.replaceNonstandardResidues()
        fixer.removeHeterogens(False)
        fixer.findMissingAtoms()
        fixer.addMissingAtoms()
        fixer.addMissingHydrogens(7.0)

        invalid_residues = self._check_residues(fixer.topology)

        return fixer, invalid_residues
示例#4
0
def add_missing_atoms(session, m, minimization_steps = 0, keep_waters = False):
    fname = m.filename
    from pdbfixer import PDBFixer
    pf = PDBFixer(filename = fname)
    pf.findMissingResidues()
    pf.findNonstandardResidues()
    pf.replaceNonstandardResidues()
    pf.findMissingAtoms()
    pf.addMissingAtoms()
    pf.removeHeterogens(keep_waters)
    pf.addMissingHydrogens(7.0)
    if minimization_steps > 0:
        minimize(pf, minimization_steps)
    from os.path import splitext
    fout = splitext(fname)[0] + '-pdbfixer.pdb'
    out = open(fout, 'w')
    from simtk.openmm.app import PDBFile
    PDBFile.writeFile(pf.topology, pf.positions, out)
    out.close()
    mfix = session.models.open([fout])[0]
    mfix.atoms.displays = True
    mfix.residues.ribbon_displays = False
    m.display = False
    log = session.logger
    log.info('Wrote %s' % fout)
示例#5
0
文件: clustenm.py 项目: SHZ66/ProDy
    def _fix(self, atoms):

        try:
            from pdbfixer import PDBFixer
            from openmm.app import PDBFile
        except ImportError:
            raise ImportError('Please install PDBFixer and OpenMM 7.6 in order to use ClustENM.')

        stream = createStringIO()
        title = atoms.getTitle()
        writePDBStream(stream, atoms)
        stream.seek(0)
        fixed = PDBFixer(pdbfile=stream)
        stream.close()

        fixed.missingResidues = {}
        fixed.findNonstandardResidues()
        fixed.replaceNonstandardResidues()
        fixed.removeHeterogens(False)
        fixed.findMissingAtoms()
        fixed.addMissingAtoms()
        fixed.addMissingHydrogens(self._ph)

        stream = createStringIO()
        PDBFile.writeFile(fixed.topology, fixed.positions,
                          stream, keepIds=True)
        stream.seek(0)
        self._atoms = parsePDBStream(stream)
        self._atoms.setTitle(title)
        stream.close()

        self._topology = fixed.topology
        self._positions = fixed.positions
示例#6
0
def fix_pdb(pdb_id, pdb_file, pdb_group):
    chains_to_retain = get_required_chains(pdb_group)
    chains_to_remove = []

    for chain in PDBParser().get_structure(pdb_id, pdb_file)[0]:
        if chain.get_id() not in chains_to_retain:
            chains_to_remove.append(chain.get_id())

    fixer = PDBFixer(filename=pdb_file)

    fixer.removeChains(chainIds=chains_to_remove)

    fixer.findMissingResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.removeHeterogens(True)

    # KeepIds flag is critical here, otherwise we loose all information binding
    pdb_file = dirname(pdb_file) + '/' + pdb_id + '.pdb'
    PDBFile.writeFile(fixer.topology,
                      fixer.positions,
                      open(pdb_file, 'w'),
                      keepIds=True)

    return pdb_file
示例#7
0
def add_hydrogens_to_mol(mol):
    """
  Add hydrogens to a molecule object
  TODO (LESWING) see if there are more flags to add here for default
  :param mol: Rdkit Mol
  :return: Rdkit Mol
  """
    molecule_file = None
    try:
        pdbblock = Chem.MolToPDBBlock(mol)
        pdb_stringio = StringIO()
        pdb_stringio.write(pdbblock)
        pdb_stringio.seek(0)
        fixer = PDBFixer(pdbfile=pdb_stringio)
        fixer.addMissingHydrogens(7.4)

        hydrogenated_io = StringIO()
        PDBFile.writeFile(fixer.topology, fixer.positions, hydrogenated_io)
        hydrogenated_io.seek(0)
        return Chem.MolFromPDBBlock(hydrogenated_io.read(),
                                    sanitize=False,
                                    removeHs=False)
    except ValueError as e:
        logging.warning("Unable to add hydrogens", e)
        raise MoleculeLoadException(e)
    finally:
        try:
            os.remove(molecule_file)
        except (OSError, TypeError):
            pass
示例#8
0
def pdb_clean_sim(args):
    """
    Top-level function to be executed in parallel to clean and generate features.

    :param args: Input and output directories, pdb name.
    :return:
    """
    input_dir, output_dir, fname = args
    # print(input_dir, output_dir, fname)
    if not Path(output_dir + fname).exists():
        # clean PDB
        pdb = pmd.load_file(input_dir + fname)
        pdb.save('/tmp/' + fname, overwrite=True)

        fixer = PDBFixer(filename='/tmp/' + fname)
        Path('/tmp/' + fname).unlink()
        fixer.findMissingResidues()
        fixer.findNonstandardResidues()
        # print(f'number of non-standard residues in {fname}: {len(fixer.nonstandardResidues)}')
        fixer.replaceNonstandardResidues()
        fixer.removeHeterogens(False)
        fixer.findMissingAtoms()
        fixer.addMissingAtoms()
        fixer.addMissingHydrogens(7.0)

        # fixer.addSolvent(fixer.topology.getUnitCellDimensions())

        # Run simulation
        try:
            forcefield = so.app.ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')
            system = forcefield.createSystem(fixer.topology, nonbondedMethod=so.app.NoCutoff)
            param = pmd.openmm.load_topology(fixer.topology, system=system, xyz=fixer.positions)

            basename = '.'.join(fname.split('.')[:-1])

            # get indices of atoms for the 2 interacting subunits
            sub_unit_chains = pdb_parser(basename)
            # print(param.to_dataframe()['chain'])
            ids0, ids1 = (np.where(param.to_dataframe()['chain'].isin(cids))[
                          0] for cids in sub_unit_chains)
            # print(sub_unit_chains,fname,ids0,ids1)

            features = generate_features(ids0, ids1, forcefield, system, param)

            print(f'done simulating: {fname}')

            # stack 3 matrices into 1
            combined_mat = np.stack((features["U_LJ"], features["U_el"], features["D_mat"]))

            np.save(output_dir + '/' + basename + '.npy', combined_mat)

            print(f'saved features: {fname}')

        except Exception as e:
            print(f'could not simulate: {fname} Exception: {e}')
            return 1, f'E;{fname};{e}'

    return 0, f'S;{fname};'
示例#9
0
def getAllChains(pdbFile):
    fixer = PDBFixer(filename=pdbFile)
    # remove unwanted chains
    chains = list(fixer.topology.chains())
    a = ""
    for i in chains:
        if i.id in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
            a += i.id
    return ''.join(sorted(set(a.upper().replace(" ", ""))))
示例#10
0
def fix_pdb(pdb_file):
    fixer = PDBFixer(filename=pdb_file)
    fixer.findMissingResidues()
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.removeHeterogens(True)
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(7.0)
    PDBFile.writeFile(fixer.topology, fixer.positions, open(pdb_file, 'w'))
示例#11
0
 def fix_pdb(self, infile, out=None, pH=7):
     with open(infile, 'r') as f:
         fixer = PDBFixer(pdbfile=f)
     fixer.findMissingResidues()
     fixer.findMissingAtoms()
     fixer.addMissingAtoms()
     fixer.addMissingHydrogens(pH=pH)
     if out is None:
         out = '{0[0]}{1}{0[1]}'.format(os.path.splitext(infile), '_fixed')
     with open(out, 'w') as f:
         PDBFile.writeFile(fixer.topology, fixer.positions, f)
示例#12
0
    def __init__(self,constraints=app.HBonds, hydrogenMass=None, pH=7.0, **kwargs):

        TestSystem.__init__(self, **kwargs)

        system_receptor_pdb=PDBFixer(os.path.dirname(__file__)+"/pdbs/Barnase.pdb")
        system_ligand_pdb  =PDBFixer(os.path.dirname(__file__)+"/pdbs/Barstar.pdb")

        forcefield = app.ForceField('amber14-all.xml')
        modeller_receptor = app.Modeller(system_receptor_pdb.topology, system_receptor_pdb.positions)
        addHs_receptor_log = modeller_receptor.addHydrogens(forcefield, pH=pH)
        modeller_ligand = app.Modeller(system_ligand_pdb.topology, system_ligand_pdb.positions)
        addHs_ligand_log = modeller_ligand.addHydrogens(forcefield, pH=pH)

        modeller_complex = app.Modeller(modeller_receptor.getTopology(),modeller_receptor.getPositions())
        modeller_complex.add(modeller_ligand.getTopology(),modeller_ligand.getPositions())

        self.topology  = modeller_complex.getTopology()
        self.positions = modeller_complex.getPositions() # asNumpy=True
        self.positions._value = np.array(self.positions._value)
        self.system    = forcefield.createSystem(modeller_complex.topology,implicitSolvent=None,
                                         constraints=constraints,nonbondedMethod=app.NoCutoff,
                                         hydrogenMass=hydrogenMass)
示例#13
0
 def add_hydrogens_by_openmm(self):
     from simtk.openmm.app import ForceField, Modeller, PDBFile
     from pdbfixer import PDBFixer
     fixer = PDBFixer(self.name)
     field = ForceField('amber99sb.xml', 'tip3p.xml')
     fixer.findMissingResidues()
     fixer.findMissingAtoms()
     fixer.addMissingAtoms()
     fixer.addMissingHydrogens(7.0)
     modeller = Modeller(fixer.topology, fixer.positions)
     modeller.addHydrogens(forcefield=field)
     modeller.deleteWater()
     PDBFile.writeModel(modeller.topology, modeller.positions, open(self.shotname+'_h.pdb', 'w'))
示例#14
0
    def __init__(self,constraints=app.HBonds, hydrogenMass=None, pH=7.0, **kwargs):

        TestSystem.__init__(self, **kwargs)

        system_pdb=PDBFixer(os.path.dirname(__file__)+"/pdbs/Barnase.pdb")

        forcefield = app.ForceField('amber14-all.xml')
        modeller = app.Modeller(system_pdb.topology, system_pdb.positions)
        addHs_log = modeller.addHydrogens(forcefield, pH=pH)

        self.topology  = modeller.getTopology()
        self.positions = modeller.getPositions() # asNumpy=True
        self.positions._value = np.array(self.positions._value)
        self.system   = tmp_testsystem.system
示例#15
0
def pdbfix(receptor: Optional[str] = None, pdbid: Optional[str] = None, 
           pH: float = 7.0, path: str = '.', **kwargs) -> str:
    if pdbid:
        fixer = PDBFixer(pdbid=pdbid)
    else:
        fixer = PDBFixer(filename=receptor)

    fixer.findMissingResidues()
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.removeHeterogens()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(pH)

    if receptor:
        outfile = receptor
    else:
        outfile = Path(path)/f'{pdbid}.pdb'

    PDBFile.writeFile(fixer.topology, fixer.positions, open(outfile, 'w'))
    
    return outfile
示例#16
0
def startPageCallback(parameters, handler):
    global fixer
    if 'type' in parameters:
        if parameters.getfirst('type') == 'local':
            pdb = PdbStructure(parameters['pdbfile'].value.splitlines())
            fixer = PDBFixer(pdb)
            displayDeleteChainsPage()
        else:
            id = parameters.getfirst('pdbid')
            url = "ftp://ftp.wwpdb.org/pub/pdb/data/structures/all/pdb/pdb" + id.lower(
            ) + ".ent.gz"
            try:
                response = urllib2.urlopen(url)
                content = gzip.GzipFile(
                    fileobj=StringIO(response.read())).read()
                pdb = PdbStructure(content.splitlines())
                fixer = PDBFixer(pdb)
                displayDeleteChainsPage()
            except:
                handler.sendResponse(
                    header +
                    "Unable to download the PDB file. This may indicate an invalid PDB identifier, or an error in network connectivity."
                    + loadHtmlFile("error.html"))
示例#17
0
def cleanPdb(pdb_list, chain=None, fromFolder=None, toFolder="cleaned_pdbs"):
    os.system(f"mkdir -p {toFolder}")
    for pdb_id in pdb_list:
        # print(chain)
        pdb = f"{pdb_id.lower()[:4]}"
        pdbFile = pdb + ".pdb"
        if fromFolder is None:
            fromFile = os.path.join("original_pdbs", pdbFile)
        elif fromFolder[:4] == ".pdb":
            fromFile = fromFolder
        else:
            fromFile = os.path.join(fromFolder, pdbFile)
        if chain is None:  # None mean deafult is chain A unless specified.
            if len(pdb_id) == 5:
                Chosen_chain = pdb_id[4].upper()
            else:
                assert (len(pdb_id) == 4)
                Chosen_chain = "A"
        elif chain == "-1" or chain == -1:
            Chosen_chain = getAllChains(fromFile)
        else:
            Chosen_chain = chain
        # clean pdb
        fixer = PDBFixer(filename=fromFile)
        # remove unwanted chains
        chains = list(fixer.topology.chains())
        chains_to_remove = [
            i for i, x in enumerate(chains) if x.id not in Chosen_chain
        ]
        fixer.removeChains(chains_to_remove)

        fixer.findMissingResidues()
        # add missing residues in the middle of a chain, not ones at the start or end of the chain.
        chains = list(fixer.topology.chains())
        keys = fixer.missingResidues.keys()
        # print(keys)
        for key in list(keys):
            chain_tmp = chains[key[0]]
            if key[1] == 0 or key[1] == len(list(chain_tmp.residues())):
                del fixer.missingResidues[key]

        fixer.findNonstandardResidues()
        fixer.replaceNonstandardResidues()
        fixer.removeHeterogens(keepWater=False)
        fixer.findMissingAtoms()
        fixer.addMissingAtoms()
        fixer.addMissingHydrogens(7.0)
        PDBFile.writeFile(fixer.topology, fixer.positions,
                          open(os.path.join(toFolder, pdbFile), 'w'))
示例#18
0
def add_membrane(pdb_path, membrane_lipid_type='POPC', out_as=None):
    """
        Make a lipid bilayer for your protein easy.

            Parameters
            ----------

            pdb_path: Give your pdb whole path to this parameter

            membrane_lipid_type : Add POPC or POPE lipid membranes to your system.

            out_as: Give and extension list like ['psf', 'crd', 'gro']

            Example
            ----------

            add_membrane('protein.pdb', 'POPC', ['crd', 'gro'])

        """
    fixer = PDBFixer(filename=pdb_path)
    fixer.findMissingResidues()
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(7.0)

    print('\nAdding membrane:', membrane_lipid_type)
    app.PDBFile.writeFile(fixer.topology, fixer.positions, open("fixed.pdb", 'w'))
    fixer.addMembrane(lipidType=membrane_lipid_type,
                      membraneCenterZ=0 * unit.nanometer,
                      minimumPadding=1 * unit.nanometer,
                      positiveIon="Na+",
                      negativeIon="Cl-",
                      ionicStrength=0.0 * unit.molar)
    app.PDBFile.writeFile(fixer.topology, fixer.positions, open("fixed_membrane.pdb", 'w'), keepIds=True)

    if out_as is not None:
        struct = pmd.load_file('fixed_membrane.pdb')
        for i in out_as:
            try:
                print("Savind *.%s extension File" % i)
                struct.save('fixed_membrane.%s' % i)
            except:
                pass
示例#19
0
def download_pdb(pdbid, file_pathway):
    """

    Args:
        pdbid: 4 letter string specifying the PDB ID of the file yoou want to fix
        file_pathway: a string containing the pathway specifying how you want to organize the PDB files once written

    Returns: nothing, but it does write the PDB file

    ***Note: this function does NOT fix any mistakes with the PDB file

    """

    if not os.path.exists(file_pathway):
        os.makedirs(file_pathway)
    fixer = PDBFixer(pdbid=pdbid)
    PDBFile.writeFile(fixer.topology, fixer.positions,
                      open(os.path.join(file_pathway, '%s.pdb' % pdbid), 'w'))
示例#20
0
    def __init__(self, config_: Config):
        self.config = config_
        self.logger = make_message_writer(self.config.verbose, self.__class__.__name__)
        with self.logger("__init__") as logger:
            self.boxvec = None
            self.explicit = self.config.explicit
            self.system = None
            ofs = oechem.oemolistream(self.config.ligand_file_name)
            oemol = oechem.OEMol()
            oechem.OEReadMolecule(ofs, oemol)
            ofs.close()
            self.inital_ligand_smiles = oechem.OEMolToSmiles(oemol)
            self.params_written = 0
            self.mol = Molecule.from_openeye(oemol, allow_undefined_stereo=True)
            fixer = PDBFixer(self.config.pdb_file_name)
            
            if self.config.use_pdbfixer:
                logger.log("Fixing with PDBFixer")

                fixer.findMissingResidues()
                fixer.findNonstandardResidues()
                fixer.replaceNonstandardResidues()
                fixer.removeHeterogens(keepWater=False)
                fixer.findMissingAtoms()
                fixer.addMissingAtoms()
                fixer.addMissingHydrogens(7.0)



                logger.log("Found missing residues: ", fixer.missingResidues)
                logger.log("Found missing terminals residues: ", fixer.missingTerminals)
                logger.log("Found missing atoms:", fixer.missingAtoms)
                logger.log("Found nonstandard residues:", fixer.nonstandardResidues)


            self.config.pdb_file_name = f"{self.config.tempdir(main_context=True)}/inital_fixed.pdb"
            with open(self.config.pdb_file_name, 'w') as f:
                app.PDBFile.writeFile(fixer.topology, fixer.positions, f)
            cmd.reinitialize()
            cmd.load(self.config.pdb_file_name)
            cmd.load(self.config.ligand_file_name, "UNL")
            cmd.alter("UNL", "resn='UNL'")
            cmd.save("{}".format(self.config.pdb_file_name))
def pdbfix_protein(input_pdb_path,
                   output_pdb_path,
                   find_missing_residues=True,
                   keep_water=False,
                   ph=None):
    """Run PDBFixer on the input PDB file.

    Heterogen atoms are always removed.

    Parameters
    ----------
    input_pdb_path : str
        The PDB to fix.
    output_pdb_path : str
        The path to the output PDB file.
    find_missing_residues : bool, optional
        If True, PDBFixer will try to model the unresolved residues
        that appear in the amino acid sequence (default is True).
    keep_water : bool, optional
        If True, water molecules are not stripped (default is False).
    ph : float or None, optional
        If not None, hydrogen atoms will be added at this pH.

    """
    fixer = PDBFixer(filename=input_pdb_path)
    if find_missing_residues:
        fixer.findMissingResidues()
    else:
        fixer.missingResidues = {}
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.removeHeterogens(keep_water)
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    if ph is not None:
        fixer.addMissingHydrogens(ph)

    # print(fixer.nonstandardResidues)
    # print(fixer.missingAtoms)
    # print(fixer.missingTerminals)

    with open(output_pdb_path, 'w') as f:
        PDBFile.writeFile(fixer.topology, fixer.positions, f)
示例#22
0
def _apply_pdbfix(molecule, pH=7.0, add_hydrogens=False):
    """
    Run PDBFixer to ammend potential issues in PDB format.

    Parameters
    ----------
    molecule : chimera.Molecule
        Chimera Molecule object to fix.
    pH : float, optional
        Target pH for adding missing hydrogens.
    add_hydrogens : bool, optional
        Whether to add missing hydrogens or not.

    Returns
    -------
    memfile : StringIO
        An in-memory file with the modified PDB contents
    """
    memfile = StringIO()
    chimera.pdbWrite([molecule], chimera.Xform(), memfile)
    chimera.openModels.close([molecule])
    memfile.seek(0)
    fixer = PDBFixer(pdbfile=memfile)
    fixer.findMissingResidues()
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.removeHeterogens(True)
    if add_hydrogens:
        fixer.addMissingHydrogens(pH)
    memfile.close()

    memfile = StringIO()
    PDBFile.writeFile(fixer.topology, fixer.positions, memfile)
    memfile.seek(0)
    molecule = chimera.openModels.open(memfile,
                                       type="PDB",
                                       identifyAs=molecule.name)
    chimera.openModels.remove(molecule)
    memfile.close()
    return molecule[0]
示例#23
0
def fix_peptide(pdb_file, seq_dict, pH=7.4, remove_water=True, remove_small_mols=True):
    global ONE_THREE_CODE
    fixer = PDBFixer(filename=pdb_file)
    fixer.sequences.clear()
    for chain in fixer.topology.chains():
        seq = pdbfixer.pdbfixer.Sequence(chain.id, [r.name for r in list(chain.residues())])
        fixer.sequences.append(seq)
    if remove_small_mols:
        fixer.removeHeterogens(not remove_water)
    delete_chains = []
    # Convert single AA codes to three letter code
    for key, value in seq_dict.items():
        if not value or value is None:
            delete_chains.append(key)
        else:
            three_letter = []
            for item in value:
                three_letter.append(ONE_THREE_CODE[item])
            seq_dict[key] = three_letter

    for chain in fixer.topology.chains():
        if chain.index in seq_dict:
            if seq_dict[chain.index] is not None:
                fixer.sequences[chain.index].residues = seq_dict[chain.index]
    fixer.findMissingResidues()
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(pH)
    fixer.removeChains(delete_chains)
    dummy = tempfile.NamedTemporaryFile(suffix=".pdb")
    app.PDBFile.writeFile(fixer.topology, fixer.positions, open(dummy.name, 'w'))
    product = mdtraj.load(dummy.name)
    problem_cis = ChiralityCheck.check_cispeptide_bond(product)
    problem_chiral = ChiralityCheck.check_chirality(product)
    print("The following problems have been detected:")
    print(problem_cis)
    print(problem_chiral)
    print("Either rerun or find a tool to solve. Perhaps VMD?")
    return product
def pdb_id_to_mol(pdb_id: str) -> Mol:
    """Transform PDB ID into rdkit Mol.

    Parameters
    ----------
    pdb_id: str
        PDB, e.g. '2244'

    Returns
    -------
    rdkit_mol: rdkit Mol
        rdkit Mol.

    """

    fixer = PDBFixer(pdbid=pdb_id)
    PDBFile.writeFile(fixer.topology, fixer.positions, open('tmp.pdb', 'w'))
    rdkit_mol = Chem.MolFromPDBFile('tmp.pdb', sanitize=True)
    os.remove('tmp.pdb')

    return rdkit_mol
示例#25
0
    def __init__(self, constraints=app.HBonds, hydrogenMass=None, pH=7.0, **kwargs):

        TestSystem.__init__(self, **kwargs)

        system_pdb=PDBFixer(os.path.dirname(__file__)+"/pdbs/1l2y.pdb")

        system_pdb.findMissingResidues()
        system_pdb.findNonstandardResidues()
        system_pdb.findMissingAtoms()
        system_pdb.addMissingAtoms()

        forcefield = app.ForceField('amber96.xml', 'amber96_obc.xml')
        modeller = app.Modeller(system_pdb.topology, system_pdb.positions)
        addHs_log = modeller.addHydrogens(forcefield, pH=pH)

        self.topology  = modeller.getTopology()
        self.positions = modeller.getPositions() # asNumpy=True
        self.positions._value = np.array(self.positions._value)
        self.system    = forcefield.createSystem(modeller.topology,implicitSolvent=app.OBC1,
                                         constraints=constraints,nonbondedMethod=app.NoCutoff,
                                         hydrogenMass=hydrogenMass)
示例#26
0
def fix_pdb(pdb_id):
    path = os.getcwd()
    if len(pdb_id) != 4:
        print("Creating PDBFixer...")
        fixer = PDBFixer(pdb_id)
        print("Finding missing residues...")
        fixer.findMissingResidues()

        chains = list(fixer.topology.chains())
        keys = fixer.missingResidues.keys()
        for key in list(keys):
            chain = chains[key[0]]
            if key[1] == 0 or key[1] == len(list(chain.residues())):
                print("ok")
                del fixer.missingResidues[key]

        print("Finding nonstandard residues...")
        fixer.findNonstandardResidues()
        print("Replacing nonstandard residues...")
        fixer.replaceNonstandardResidues()
        print("Removing heterogens...")
        fixer.removeHeterogens(keepWater=True)

        print("Finding missing atoms...")
        fixer.findMissingAtoms()
        print("Adding missing atoms...")
        fixer.addMissingAtoms()
        print("Adding missing hydrogens...")
        fixer.addMissingHydrogens(7)
        print("Writing PDB file...")

        PDBFile.writeFile(
            fixer.topology,
            fixer.positions,
            open(
                os.path.join(path,
                             "%s_fixed_pH_%s.pdb" % (pdb_id.split('.')[0], 7)),
                "w"),
            keepIds=True)
        return "%s_fixed_pH_%s.pdb" % (pdb_id.split('.')[0], 7)
示例#27
0
    def _via_helper_water(cls, **kwargs):
        """
        Helper function for via_rdkit or via_openeye

        Returns
        ------------------
        system_pmd : parmed.structure
            The parameterised system as parmed object
        """
        from pdbfixer import PDBFixer  # for solvating

        fixer = PDBFixer(cls.pdb_filename)
        if "padding" not in kwargs:
            fixer.addSolvent(padding=cls.default_padding)
        else:
            fixer.addSolvent(padding=float(kwargs["padding"]))

        tmp_dir = tempfile.mkdtemp()
        cls.pdb_filename = tempfile.mktemp(suffix=".pdb", dir=tmp_dir)
        with open(cls.pdb_filename, "w") as f:
            PDBFile.writeFile(fixer.topology, fixer.positions, f)
        complex = parmed.load_file(cls.pdb_filename)

        solvent = complex["(:HOH)"]
        num_solvent = len(solvent.residues)

        solvent_pmd = cls.solvent_pmd * num_solvent
        solvent_pmd.positions = solvent.positions

        cls.system_pmd = cls.ligand_pmd + solvent_pmd
        cls.system_pmd.box_vectors = complex.box_vectors

        try:
            shutil.rmtree("/".join(cls.pdb_filename.split("/")[:-1]))
            del cls.ligand_pmd
        except:
            pass

        cls.system_pmd.title = cls.smiles
        return cls.system_pmd
示例#28
0
def add_solvent(pdb_filepath: str,
                ani_input: dict,
                pdb_output_filepath: str,
                box_length: unit.quantity.Quantity = (2.5 * unit.nanometer)):

    assert (type(box_length) == unit.Quantity)

    pdb = PDBFixer(filename=pdb_filepath)
    # Step 0: put the ligand in the center
    #pdb.positions = np.array(pdb.positions.value_in_unit(unit.nanometer)) + box_length/2
    # add water
    l = box_length.value_in_unit(unit.nanometer)
    pdb.addSolvent(boxVectors=(Vec3(l, 0.0, 0.0), Vec3(0.0, l, 0.0),
                               Vec3(0.0, 0.0, l)))
    # Step 1: convert coordinates from standard cartesian coordinate to unit
    # cell coordinates
    #inv_cell = 1/box_length
    #coordinates_cell = np.array(pdb.positions.value_in_unit(unit.nanometer)) * inv_cell
    # Step 2: wrap cell coordinates into [0, 1)
    #coordinates_cell -= np.floor(coordinates_cell)
    # Step 3: convert back to coordinates
    #coordinates_cell = (coordinates_cell * box_length) * unit.nanometer
    #pdb.positions = coordinates_cell
    from simtk.openmm.app import PDBFile
    PDBFile.writeFile(pdb.topology, pdb.positions,
                      open(pdb_output_filepath, 'w'))

    atom_list = []
    coord_list = []
    for atom, coor in zip(pdb.topology.atoms(), pdb.positions):
        if atom.residue.name != 'HOH':
            continue
        atom_list.append(atom.element.symbol)
        coor = coor.value_in_unit(unit.angstrom)
        coord_list.append([coor[0], coor[1], coor[2]])

    ani_input['solvent_atoms'] = ''.join(atom_list)
    ani_input['solvent_coords'] = np.array(coord_list) * unit.angstrom
    ani_input['box_length'] = box_length
示例#29
0
def getAllChains(pdbFile, removeDNAchains=True):
    fixer = PDBFixer(filename=pdbFile)
    # we only want pdb chains, ligands or DNA chain will be ignored here.
    fixer.removeHeterogens(keepWater=False)
    # remove unwanted chains
    chains = list(fixer.topology.chains())
    a = ""

    proteinResidues = [
        'ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR', 'TYR',
        'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER', 'TRP', 'VAL'
    ]
    rnaResidues = ['A', 'G', 'C', 'U', 'I']
    dnaResidues = ['DA', 'DG', 'DC', 'DT', 'DI']
    for c in chains:
        if removeDNAchains and np.alltrue(
            [a.name in dnaResidues for a in c.residues()]):
            print(f"chain {c.id} is a DNA chain. it will be removed")
            continue
        if c.id in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789':
            a += c.id
    # return ''.join(sorted(set(a.upper().replace(" ", ""))))
    return ''.join(sorted(set(a.replace(" ", ""))))
示例#30
0
def prepare_inputs(
        protein: str,
        ligand: str,
        replace_nonstandard_residues: bool = True,
        remove_heterogens: bool = True,
        remove_water: bool = True,
        add_hydrogens: bool = True,
        pH: float = 7.0,
        optimize_ligand: bool = True,
        pdb_name: Optional[str] = None) -> Tuple[RDKitMol, RDKitMol]:
    """This prepares protein-ligand complexes for docking.

  Autodock Vina requires PDB files for proteins and ligands with
  sensible inputs. This function uses PDBFixer and RDKit to ensure
  that inputs are reasonable and ready for docking. Default values
  are given for convenience, but fixing PDB files is complicated and
  human judgement is required to produce protein structures suitable
  for docking. Always inspect the results carefully before trying to
  perform docking.

  Parameters
  ----------
  protein: str
    Filename for protein PDB file or a PDBID.
  ligand: str
    Either a filename for a ligand PDB file or a SMILES string.
  replace_nonstandard_residues: bool (default True)
    Replace nonstandard residues with standard residues.
  remove_heterogens: bool (default True)
    Removes residues that are not standard amino acids or nucleotides.
  remove_water: bool (default True)
    Remove water molecules.
  add_hydrogens: bool (default True)
    Add missing hydrogens at the protonation state given by `pH`.
  pH: float (default 7.0)
    Most common form of each residue at given `pH` value is used.
  optimize_ligand: bool (default True)
    If True, optimize ligand with RDKit. Required for SMILES inputs.
  pdb_name: Optional[str]
    If given, write sanitized protein and ligand to files called
    "pdb_name.pdb" and "ligand_pdb_name.pdb"

  Returns
  -------
  Tuple[RDKitMol, RDKitMol]
    Tuple of `protein_molecule, ligand_molecule` with 3D information.

  Note
  ----
  This function requires RDKit and OpenMM to be installed.
  Read more about PDBFixer here: https://github.com/openmm/pdbfixer.

  Examples
  --------
  >>> p, m = prepare_inputs('3cyx', 'CCC')

  >> p.GetNumAtoms()
  >> m.GetNumAtoms()

  >>> p, m = prepare_inputs('3cyx', 'CCC', remove_heterogens=False)

  >> p.GetNumAtoms()

  """

    try:
        from rdkit import Chem
        from rdkit.Chem import AllChem
        from pdbfixer import PDBFixer
        from simtk.openmm.app import PDBFile
    except ModuleNotFoundError:
        raise ImportError(
            "This function requires RDKit and OpenMM to be installed.")

    if protein.endswith('.pdb'):
        fixer = PDBFixer(protein)
    else:
        fixer = PDBFixer(url='https://files.rcsb.org/download/%s.pdb' %
                         (protein))

    if ligand.endswith('.pdb'):
        m = Chem.MolFromPDBFile(ligand)
    else:
        m = Chem.MolFromSmiles(ligand, sanitize=True)

    # Apply common fixes to PDB files
    if replace_nonstandard_residues:
        fixer.findMissingResidues()
        fixer.findNonstandardResidues()
        fixer.replaceNonstandardResidues()
    if remove_heterogens and not remove_water:
        fixer.removeHeterogens(True)
    if remove_heterogens and remove_water:
        fixer.removeHeterogens(False)
    if add_hydrogens:
        fixer.addMissingHydrogens(pH)

    PDBFile.writeFile(fixer.topology, fixer.positions, open('tmp.pdb', 'w'))
    p = Chem.MolFromPDBFile('tmp.pdb', sanitize=True)
    os.remove('tmp.pdb')

    # Optimize ligand
    if optimize_ligand:
        m = Chem.AddHs(m)  # need hydrogens for optimization
        AllChem.EmbedMolecule(m)
        AllChem.MMFFOptimizeMolecule(m)

    if pdb_name:
        Chem.rdmolfiles.MolToPDBFile(p, '%s.pdb' % (pdb_name))
        Chem.rdmolfiles.MolToPDBFile(m, 'ligand_%s.pdb' % (pdb_name))

    return (p, m)