Пример #1
0
def runPDB2PQR(pdblist,
               ff,
               outname="",
               ph=None,
               verbose=False,
               selectedExtensions=[],
               extensionOptions=utilities.ExtraOptions(),
               ph_calc_method=None,
               ph_calc_options=None,
               clean=False,
               neutraln=False,
               neutralc=False,
               ligand=None,
               assign_only=False,
               chain=False,
               drop_water=False,
               debump=True,
               opt=True,
               typemap=False,
               userff=None,
               usernames=None,
               ffout=None,
               commandLine=None,
               include_old_header=False):
    """
        Run the PDB2PQR Suite

        Arguments:
            pdblist: The list of objects that was read from the PDB file
                     given as input (list)
            ff:      The name of the forcefield (string)

        Keyword Arguments:
            outname:       The name of the desired output file
            ph:            The desired ph of the system (float)
            verbose:       When True, script will print information to stdout
                             When False, no detailed information will be printed (float)
            extensions:      List of extensions to run
            extensionOptions:optionParser like option object that is passed to each object.
            propkaOptions:optionParser like option object for propka30.
            clean:         only return original PDB file in aligned format.
            neutraln:      Make the N-terminus of this protein neutral
            neutralc:      Make the C-terminus of this protein neutral
            ligand:        Calculate the parameters for the ligand in mol2 format at the given path.
            assign_only:   Only assign charges and radii - do not add atoms, debump, or optimize.
            chain:         Keep the chain ID in the output PQR file
            drop_water:    Remove water molecules from output
            debump:        When 1, debump heavy atoms (int)
            opt:           When 1, run hydrogen optimization (int)
            typemap:       Create Typemap output.
            userff:        The user created forcefield file to use. Overrides ff.
            usernames:     The user created names file to use. Required if using userff.
            ffout:         Instead of using the standard canonical naming scheme for residue and atom names,  +
                           use the names from the given forcefield
            commandLine:   command line used (if any) to launch the program. Included in output header.
            include_old_header: Include most of the PDB header in output.
            pdb2pka_params: parameters for running pdb2pka.

        Returns
            header:  The PQR file header (string)
            lines:   The PQR file atoms (list)
            missedligandresidues:  A list of ligand residue names whose charges could
                     not be assigned (ligand)
    """

    pkaname = ""
    lines = []
    Lig = None
    atomcount = 0  # Count the number of ATOM records in pdb

    outroot = utilities.getPQRBaseFileName(outname)

    if ph_calc_method == 'propka':
        pkaname = outroot + ".propka"
        #TODO: What? Shouldn't it be up to propka on how to handle this?
        if os.path.isfile(pkaname):
            os.remove(pkaname)

    start = time.time()

    if verbose:
        print "Beginning PDB2PQR...\n"

    myDefinition = Definition()
    if verbose:
        print "Parsed Amino Acid definition file."

    if drop_water:
        # Remove the waters
        pdblist_new = []
        for record in pdblist:
            if isinstance(record, (HETATM, ATOM, SIGATM, SEQADV)):
                if record.resName in WAT.water_residue_names:
                    continue
            pdblist_new.append(record)

        pdblist = pdblist_new

    # Check for the presence of a ligand!  This code is taken from pdb2pka/pka.py

    if not ligand is None:
        from pdb2pka.ligandclean import ligff
        myProtein, myDefinition, Lig = ligff.initialize(
            myDefinition, ligand, pdblist, verbose)
        for atom in myProtein.getAtoms():
            if atom.type == "ATOM":
                atomcount += 1
    else:
        myProtein = Protein(pdblist, myDefinition)

    if verbose:
        print "Created protein object -"
        print "\tNumber of residues in protein: %s" % myProtein.numResidues()
        print "\tNumber of atoms in protein   : %s" % myProtein.numAtoms()

    myRoutines = Routines(myProtein, verbose)

    for residue in myProtein.getResidues():
        multoccupancy = 0
        for atom in residue.getAtoms():
            if atom.altLoc != "":
                multoccupancy = 1
                txt = "Warning: multiple occupancies found: %s in %s\n" % (
                    atom.name, residue)
                sys.stderr.write(txt)
        if multoccupancy == 1:
            myRoutines.warnings.append(
                "WARNING: multiple occupancies found in %s,\n" % (residue))
            myRoutines.warnings.append(
                "         at least one of the instances is being ignored.\n")

    myRoutines.setTermini(neutraln, neutralc)
    myRoutines.updateBonds()

    if clean:
        header = ""
        lines = myProtein.printAtoms(myProtein.getAtoms(), chain)

        # Process the extensions
        for ext in selectedExtensions:
            module = extensions.extDict[ext]
            #TODO: figure out a way to do this without crashing...
            #tempRoutines = copy.deepcopy(myRoutines)
            module.run_extension(myRoutines, outroot, extensionOptions)

        if verbose:
            print "Total time taken: %.2f seconds\n" % (time.time() - start)

        #Be sure to include None for missed ligand residues
        return header, lines, None

    #remove any future need to convert to lower case
    if not ff is None:
        ff = ff.lower()
    if not ffout is None:
        ffout = ffout.lower()

    if not assign_only:
        # It is OK to process ligands with no ATOM records in the pdb
        if atomcount == 0 and Lig != None:
            pass
        else:
            myRoutines.findMissingHeavy()
        myRoutines.updateSSbridges()

        if debump:
            myRoutines.debumpProtein()

        if ph_calc_method == 'propka':
            myRoutines.runPROPKA(ph, ff, outroot, pkaname, ph_calc_options)
        elif ph_calc_method == 'pdb2pka':
            myRoutines.runPDB2PKA(ph, ff, pdblist, ligand, verbose,
                                  ph_calc_options)

        myRoutines.addHydrogens()

        myhydRoutines = hydrogenRoutines(myRoutines)

        if debump:
            myRoutines.debumpProtein()

        if opt:
            myhydRoutines.setOptimizeableHydrogens()
            myhydRoutines.initializeFullOptimization()
            myhydRoutines.optimizeHydrogens()
        else:
            myhydRoutines.initializeWaterOptimization()
            myhydRoutines.optimizeHydrogens()

        # Special for GLH/ASH, since both conformations were added
        myhydRoutines.cleanup()

    else:  # Special case for HIS if using assign-only
        for residue in myProtein.getResidues():
            if isinstance(residue, HIS):
                myRoutines.applyPatch("HIP", residue)

    myRoutines.setStates()

    myForcefield = Forcefield(ff, myDefinition, userff, usernames)
    hitlist, misslist = myRoutines.applyForcefield(myForcefield)

    ligsuccess = 0

    if not ligand is None:
        # If this is independent, we can assign charges and radii here
        for residue in myProtein.getResidues():
            if isinstance(residue, LIG):
                templist = []
                Lig.make_up2date(residue)
                for atom in residue.getAtoms():
                    atom.ffcharge = Lig.ligand_props[atom.name]["charge"]
                    atom.radius = Lig.ligand_props[atom.name]["radius"]
                    if atom in misslist:
                        misslist.pop(misslist.index(atom))
                        templist.append(atom)

                charge = residue.getCharge()
                if abs(charge - int(charge)) > 0.001:
                    # Ligand parameterization failed
                    myRoutines.warnings.append(
                        "WARNING: PDB2PQR could not successfully parameterize\n"
                    )
                    myRoutines.warnings.append(
                        "         the desired ligand; it has been left out of\n"
                    )
                    myRoutines.warnings.append("         the PQR file.\n")
                    myRoutines.warnings.append("\n")

                    # remove the ligand
                    myProtein.residues.remove(residue)
                    for myChain in myProtein.chains:
                        if residue in myChain.residues:
                            myChain.residues.remove(residue)
                else:
                    ligsuccess = 1
                    # Mark these atoms as hits
                    hitlist = hitlist + templist

    # Temporary fix; if ligand was successful, pull all ligands from misslist
    if ligsuccess:
        templist = misslist[:]
        for atom in templist:
            if isinstance(atom.residue, (Amino, Nucleic)):
                continue
            misslist.remove(atom)

    # Create the Typemap
    if typemap:
        typemapname = "%s-typemap.html" % outroot
        myProtein.createHTMLTypeMap(myDefinition, typemapname)

    # Grab the protein charge
    reslist, charge = myProtein.getCharge()

    # If we want a different naming scheme, use that

    if not ffout is None:
        scheme = ffout
        userff = None  # Currently not supported
        if scheme != ff:
            myNameScheme = Forcefield(scheme, myDefinition, userff)
        else:
            myNameScheme = myForcefield
        myRoutines.applyNameScheme(myNameScheme)

    header = printPQRHeader(pdblist,
                            misslist,
                            reslist,
                            charge,
                            ff,
                            myRoutines.getWarnings(),
                            ph_calc_method,
                            ph,
                            ffout,
                            commandLine,
                            include_old_header=include_old_header)
    lines = myProtein.printAtoms(hitlist, chain)

    # Determine if any of the atoms in misslist were ligands
    missedligandresidues = []
    for atom in misslist:
        if isinstance(atom.residue, (Amino, Nucleic)):
            continue
        if atom.resName not in missedligandresidues:
            missedligandresidues.append(atom.resName)

    # Process the extensions
    for ext in selectedExtensions:
        module = extensions.extDict[ext]
        #TODO: figure out a way to do this without crashing...
        #tempRoutines = copy.deepcopy(myRoutines)
        module.run_extension(myRoutines, outroot, extensionOptions)

    if verbose:
        print "Total time taken: %.2f seconds\n" % (time.time() - start)

    return header, lines, missedligandresidues
Пример #2
0
def runPDB2PQR(pdblist, ff,
               outname = "",
               ph = None,
               verbose = False,
               selectedExtensions = [],
               extensionOptions = utilities.ExtraOptions(),
               propkaOptions = None,
               clean = False,
               neutraln = False,
               neutralc = False,
               ligand = None,
               assign_only = False,
               chain = False,
               debump = True,
               opt = True,
               typemap = False,
               userff = None,
               usernames = None,
               ffout = None,
               commandLine=None,
               include_old_header=False):
    """
        Run the PDB2PQR Suite

        Arguments:
            pdblist: The list of objects that was read from the PDB file
                     given as input (list)
            ff:      The name of the forcefield (string)
        
        Keyword Arguments:
            outname:       The name of the desired output file
            ph:            The desired ph of the system (float)
            verbose:       When True, script will print information to stdout
                             When False, no detailed information will be printed (float)
            extensions:      List of extensions to run
            extensionOptions:optionParser like option object that is passed to each object. 
            propkaOptions:optionParser like option object for propka30.
            clean:         only return original PDB file in aligned format.
            neutraln:      Make the N-terminus of this protein neutral
            neutralc:      Make the C-terminus of this protein neutral
            ligand:        Calculate the parameters for the ligand in mol2 format at the given path.
            assign_only:   Only assign charges and radii - do not add atoms, debump, or optimize.
            chain:     Keep the chain ID in the output PQR file
            debump:        When 1, debump heavy atoms (int)
            opt:           When 1, run hydrogen optimization (int)
            typemap:       Create Typemap output.
            userff:        The user created forcefield file to use. Overrides ff.
            usernames:     The user created names file to use. Required if using userff.
            ffout:         Instead of using the standard canonical naming scheme for residue and atom names,  +
                           use the names from the given forcefield
            commandLine:   command line used (if any) to launch the program. Included in output header.
            include_old_header: Include most of the PDB header in output.
            
        Returns
            header:  The PQR file header (string)
            lines:   The PQR file atoms (list)
            missedligandresidues:  A list of ligand residue names whose charges could
                     not be assigned (ligand)
    """
    
    pkaname = ""
    outroot = ""
    lines = []
    Lig = None
    atomcount = 0   # Count the number of ATOM records in pdb
    
    outroot = utilities.getPQRBaseFileName(outname)

    if not ph is None:
        pka = True
        pkaname = outroot + ".propka"
        #TODO: What? Shouldn't it be up to propka on how to handle this?
        if os.path.isfile(pkaname): 
            os.remove(pkaname)
    else: 
        pka = False

    start = time.time()

    if verbose:
        print "Beginning PDB2PQR...\n"

    myDefinition = Definition()
    if verbose:
        print "Parsed Amino Acid definition file."   

    # Check for the presence of a ligand!  This code is taken from pdb2pka/pka.py

    if not ligand is None:
        from pdb2pka.ligandclean import ligff
        myProtein, myDefinition, Lig = ligff.initialize(myDefinition, ligand, pdblist, verbose)        
        for atom in myProtein.getAtoms():
            if atom.type == "ATOM": 
                atomcount += 1
    else:
        myProtein = Protein(pdblist, myDefinition)

    if verbose:
        print "Created protein object -"
        print "\tNumber of residues in protein: %s" % myProtein.numResidues()
        print "\tNumber of atoms in protein   : %s" % myProtein.numAtoms()
        
    myRoutines = Routines(myProtein, verbose)

    for residue in myProtein.getResidues():
        multoccupancy = 0
        for atom in residue.getAtoms():
            if atom.altLoc != "":
                multoccupancy = 1
                txt = "Warning: multiple occupancies found: %s in %s\n" % (atom.name, residue)
                sys.stderr.write(txt)
        if multoccupancy == 1:
            myRoutines.warnings.append("WARNING: multiple occupancies found in %s,\n" % (residue))
            myRoutines.warnings.append("         at least one of the instances is being ignored.\n")

    myRoutines.setTermini(neutraln, neutralc)
    myRoutines.updateBonds()

    if clean:
        header = ""
        lines = myProtein.printAtoms(myProtein.getAtoms(), chain)
      
        # Process the extensions
        for ext in selectedExtensions:
            module = extensions.extDict[ext]
            tempRoutines = copy.deepcopy(myRoutines)
            module.run_extension(tempRoutines, outroot, extensionOptions)
    
        if verbose:
            print "Total time taken: %.2f seconds\n" % (time.time() - start)
        
        #Be sure to include None for missed ligand residues
        return header, lines, None
    
    #remove any future need to convert to lower case
    if not ff is None:
        ff = ff.lower()
    if not ffout is None:
        ffout = ffout.lower()

    if not assign_only:
        # It is OK to process ligands with no ATOM records in the pdb
        if atomcount == 0 and Lig != None:
            pass
        else:
            myRoutines.findMissingHeavy()
        myRoutines.updateSSbridges()

        if debump:
            myRoutines.debumpProtein()  

        if pka:
            myRoutines.runPROPKA(ph, ff, outroot, pkaname, propkaOptions)

        myRoutines.addHydrogens()

        myhydRoutines = hydrogenRoutines(myRoutines)

        if debump:
            myRoutines.debumpProtein()  

        if opt:
            myhydRoutines.setOptimizeableHydrogens()
            myhydRoutines.initializeFullOptimization()
            myhydRoutines.optimizeHydrogens()
        else:
            myhydRoutines.initializeWaterOptimization()
            myhydRoutines.optimizeHydrogens()

        # Special for GLH/ASH, since both conformations were added
        myhydRoutines.cleanup()


    else:  # Special case for HIS if using assign-only
        for residue in myProtein.getResidues():
            if isinstance(residue, HIS):
                myRoutines.applyPatch("HIP", residue)

    myRoutines.setStates()

    myForcefield = Forcefield(ff, myDefinition, userff, usernames)
    hitlist, misslist = myRoutines.applyForcefield(myForcefield)
  
    ligsuccess = 0
    
    if not ligand is None:
        # If this is independent, we can assign charges and radii here 
        for residue in myProtein.getResidues():
            if isinstance(residue, LIG):
                templist = []
                Lig.make_up2date(residue)
                for atom in residue.getAtoms():
                    atom.ffcharge = Lig.ligand_props[atom.name]["charge"]
                    atom.radius = Lig.ligand_props[atom.name]["radius"]
                    if atom in misslist:
                        misslist.pop(misslist.index(atom))
                        templist.append(atom)

                charge = residue.getCharge()
                if abs(charge - int(charge)) > 0.001:
                    # Ligand parameterization failed
                    myRoutines.warnings.append("WARNING: PDB2PQR could not successfully parameterize\n")
                    myRoutines.warnings.append("         the desired ligand; it has been left out of\n")
                    myRoutines.warnings.append("         the PQR file.\n")
                    myRoutines.warnings.append("\n")
                    
                    # remove the ligand
                    myProtein.residues.remove(residue) 
                    for myChain in myProtein.chains:
                        if residue in myChain.residues: myChain.residues.remove(residue)
                else:
                    ligsuccess = 1
                    # Mark these atoms as hits
                    hitlist = hitlist + templist
    
    # Temporary fix; if ligand was successful, pull all ligands from misslist
    if ligsuccess:
        templist = misslist[:]
        for atom in templist:
            if isinstance(atom.residue, (Amino, Nucleic)): 
                continue
            misslist.remove(atom)

    # Create the Typemap
    if typemap:
        typemapname = "%s-typemap.html" % outroot
        myProtein.createHTMLTypeMap(myDefinition, typemapname)

    # Grab the protein charge
    reslist, charge = myProtein.getCharge()

    # If we want a different naming scheme, use that

    if not ffout is None:
        scheme = ffout
        userff = None # Currently not supported
        if scheme != ff: 
            myNameScheme = Forcefield(scheme, myDefinition, userff)
        else: 
            myNameScheme = myForcefield
        myRoutines.applyNameScheme(myNameScheme)

    header = printPQRHeader(pdblist, misslist, reslist, charge, ff, 
                            myRoutines.getWarnings(), ph, ffout, commandLine, 
                            include_old_header=include_old_header)
    lines = myProtein.printAtoms(hitlist, chain)

    # Determine if any of the atoms in misslist were ligands
    missedligandresidues = []
    for atom in misslist:
        if isinstance(atom.residue, (Amino, Nucleic)): 
            continue
        if atom.resName not in missedligandresidues:
            missedligandresidues.append(atom.resName)

    # Process the extensions
    for ext in selectedExtensions:
        module = extensions.extDict[ext]
        #TODO: figure out a way to do this without crashing...
        #tempRoutines = copy.deepcopy(myRoutines)
        module.run_extension(myRoutines, outroot, extensionOptions)
        

    if verbose:
        print "Total time taken: %.2f seconds\n" % (time.time() - start)

    return header, lines, missedligandresidues