Exemplo n.º 1
0
def pre_init(original_pdb_list=None,
             output_dir=None,
             ff=None,
             verbose=False,
             pdie=8.0,
             sdie=80,
             maps=None,
             xdiel=None,
             ydiel=None,
             zdiel=None,
             kappa=None,
             sd=None,
             ligand=None):
    """This function cleans the PDB and prepares the APBS input file

    Prepares the output folder."""

    #prepare the output directory

    output_dir = os.path.abspath(output_dir)

    try:
        os.makedirs(output_dir)
    except OSError:
        if not os.path.isdir(output_dir):
            raise ValueError('Target directory is a file! Aborting.')

    workspace_dir = os.path.join(output_dir,'workspace')

    try:
        os.makedirs(workspace_dir)
    except OSError:
        if not os.path.isdir(output_dir):
            raise ValueError('Target directory is a file! Aborting.')

    #
    # remove hydrogen atoms
    #

    working_pdb_filename = os.path.join(workspace_dir,'working.pdb')

    pka_help.dump_protein_no_hydrogens(original_pdb_list, working_pdb_filename)
    #
    # Get the PDBfile
    #
    pdbfile = getPDBFile(working_pdb_filename)
    pdblist, errlist = readPDB(pdbfile)

    if verbose:
        print "Beginning PDB2PKA...\n"
    #
    # Read the definition file
    #
    myDefinition = Definition()
    ligand_titratable_groups=None
    #
    #
    # Choose whether to include the ligand or not
    #
    # Add the ligand to the pdb2pqr arrays
    #
    Lig=None
    if ligand is None:
        myProtein = Protein(pdblist, myDefinition)
    else:
        from pdb2pka.ligandclean import ligff
        myProtein, myDefinition, Lig = ligff.initialize(myDefinition, ligand, pdblist, verbose)
    #
    # =======================================================================
    #
    # We have identified the structural elements, now contiue with the setup
    #
    # Print something for some reason?
    #
    if verbose:
        print "Created protein object -"
        print "\tNumber of residues in protein: %s" % myProtein.numResidues()
        print "\tNumber of atoms in protein   : %s" % myProtein.numAtoms()
    #
    # Set up all other routines
    #
    myRoutines = Routines(myProtein, verbose) #myDefinition)
    myRoutines.updateResidueTypes()
    myRoutines.updateSSbridges()
    myRoutines.updateBonds()
    myRoutines.setTermini()
    myRoutines.updateInternalBonds()

    myRoutines.applyNameScheme(Forcefield(ff, myDefinition, None))
    myRoutines.findMissingHeavy()
    myRoutines.addHydrogens()
    myRoutines.debumpProtein()

    #myRoutines.randomizeWaters()
    myProtein.reSerialize()
    #
    # Inject the information on hydrogen conformations in the HYDROGENS.DAT arrays
    # We get this information from ligand_titratable_groups
    #
    from src.hydrogens import hydrogenRoutines
    myRoutines.updateInternalBonds()
    myRoutines.calculateDihedralAngles()
    myhydRoutines = hydrogenRoutines(myRoutines)
    #
    # Here we should inject the info!!
    #
    myhydRoutines.setOptimizeableHydrogens()
    myhydRoutines.initializeFullOptimization()
    myhydRoutines.optimizeHydrogens()
    myhydRoutines.cleanup()
    myRoutines.setStates()

    #
    # Choose the correct forcefield
    #
    myForcefield = Forcefield(ff, myDefinition, None)
    if Lig:
        hitlist, misslist = myRoutines.applyForcefield(myForcefield)
        #
        # Can we get charges for the ligand?
        #
        templist=[]
        ligsuccess=False
        for residue in myProtein.getResidues():
            if isinstance(residue, LIG):
                templist = []
                Lig.make_up2date(residue)
                net_charge=0.0
                print 'Ligand',residue
                print 'Atom\tCharge\tRadius'
                for atom in residue.getAtoms():
                    if atom.mol2charge:
                        atom.ffcharge=atom.mol2charge
                    else:
                        atom.ffcharge = Lig.ligand_props[atom.name]["charge"]
                    #
                    # Find the net charge
                    #
                    net_charge=net_charge+atom.ffcharge
                    #
                    # Assign radius
                    #
                    atom.radius = Lig.ligand_props[atom.name]["radius"]
                    print '%s\t%6.4f\t%6.4f' %(atom.name,atom.ffcharge,atom.radius)
                    if atom in misslist:
                        misslist.pop(misslist.index(atom))
                        templist.append(atom)
                    #
                    # Store the charge and radius in the atom instance for later use
                    # This really should be done in a nicer way, but this will do for now
                    #
                    atom.secret_radius=atom.radius
                    atom.secret_charge=atom.ffcharge
                    #
                    #

                charge = residue.getCharge()
                if abs(charge - round(charge)) > 0.01:
                    # Ligand parameterization failed
                    myProtein.residues.remove(residue)
                    raise Exception('Non-integer charge on ligand: %8.5f' %charge)
                else:
                    ligsuccess = 1
                    # Mark these atoms as hits
                    hitlist = hitlist + templist
                #
                # Print the net charge
                #
                print 'Net charge for ligand %s is: %5.3f' %(residue.name,net_charge)
        #
        # Temporary fix; if ligand was successful, pull all ligands from misslist
        # Not sure if this is needed at all here ...? (Jens wrote this)
        #
        if ligsuccess:
            templist = misslist[:]
            for atom in templist:
                if isinstance(atom.residue, Amino) or isinstance(atom.residue, Nucleic):
                    continue
                misslist.remove(atom)

    if verbose:
        print "Created protein object (after processing myRoutines) -"
        print "\tNumber of residues in protein: %s" % myProtein.numResidues()
        print "\tNumber of atoms in protein   : %s" % myProtein.numAtoms()
    #
    # Create the APBS input file
    #
    import src.psize
    size=src.psize.Psize()

    method=""
    async=0
    split=0

    igen = inputgen_pKa.inputGen(working_pdb_filename)
    #
    # For convenience
    #
    igen.pdie = pdie
    print 'Setting protein dielectric constant to ',igen.pdie
    igen.sdie=sdie
    igen.maps=maps
    if maps==1:
        print "Using dielectric and mobile ion-accessibility function maps in PBE"
        if xdiel:
            igen.xdiel = xdiel
        else:
            raise PDB2PKAError('X dielectric map is missing')
        if ydiel:
            igen.ydiel = ydiel
        else:
            raise PDB2PKAError("Y dielectric map is missing\n")
        if zdiel:
            igen.zdiel = zdiel
        else:
            raise PDB2PKAError("Z dielectric map is missing\n")

        print 'Setting dielectric function maps: %s, %s, %s'%(igen.xdiel,igen.ydiel,igen.zdiel)

        if kappa:
            igen.kappa = kappa
        else:
            raise PDB2PKAError("Mobile ion-accessibility map is missing\n")

        print 'Setting mobile ion-accessibility function map to: ',igen.kappa

        if sd:
            xdiel_smooth, ydiel_smooth, zdiel_smooth = smooth(xdiel,ydiel,zdiel)
            igen.xdiel = xdiel_smooth
            igen.ydiel = ydiel_smooth
            igen.zdiel = zdiel_smooth
    #
    # Return all we need
    #
    return output_dir, myProtein, myRoutines, myForcefield,igen, ligand_titratable_groups, maps, sd
Exemplo n.º 2
0
def startpKa():
    """
        Function for starting pKa script from the command line.

        Returns
            protein:    The protein object as generated by PDB2PQR
            routines:   The routines object as generated by PDB2PQR
            forcefield: The forcefield object as generated by PDB2PQR
    """
    print
    print 'PDB2PQR pKa calculations'
    print

    parser = optparse.OptionParser()

    ##
    ## set optparse options
    ##
    parser.add_option(
        '-v','--verbose',
        dest='verbose',
        action="store_true",
        default=False,
        )
    parser.add_option(
        '--pdie',
        dest='pdie',
        default=8,
        type='int',
        help='<protein dielectric constant>',
        )
    parser.add_option(
        '--sdie',
        dest='sdie',
        default=80,
        type='int',
        help='<solvent dielectric constant>',
        )
    parser.add_option(
        '--ff',
        dest='ff',
        type='choice',
        default='parse',
        choices=("amber","AMBER","charmm","CHARMM","parse","PARSE",),
        help='<force field (amber, charmm, parse)>',
        )
    parser.add_option(
        '--resume',
        dest='resume',
        action="store_true",
        default=False,
        help='resume run from saved state.',
        )
    parser.add_option(
        '--ligand',
        dest='ligand',
        type='str',
        help='<ligand in MOL2 format>',
        )
    parser.add_option(
        '--maps',
        dest='maps',
        default=None,
        type='int',
        help='<1 for using provided 3D maps; 2 for genereting new maps>',
        )
    parser.add_option(
        '--xdiel',
        dest='xdiel',
        default=None,
        type='str',
        help='<xdiel maps>',
        )
    parser.add_option(
        '--ydiel',
        dest='ydiel',
        default=None,
        type='str',
        help='<ydiel maps>',
        )
    parser.add_option(
        '--zdiel',
        dest='zdiel',
        default=None,
        type='str',
        help='<zdiel maps>',
        )
    parser.add_option(
        '--kappa',
        dest='kappa',
        default=None,
        type='str',
        help='<ion-accessibility map>',
        )
    parser.add_option(
        '--smooth',
        dest='sd',
        default=None,
        type='float',
        help='<st.dev [A] of Gaussian smooting of 3D maps at the boundary, bandthwith=3 st.dev>',
        )
    #
    # Cut off energy for calculating non-charged-charged interaction energies
    #
    parser.add_option('--pairene',dest='pairene',type='float',default=1.0,
                      help='Cutoff energy in kT for calculating non charged-charged interaction energies. Default: %default')
    #
    # Options for doing partial calculations
    #
    parser.add_option('--res_energy',
                      dest='desolvation_res',
                      default=[],
                      action='append',
                      type='string',
                      help='Calculate desolvation energy and interaction energy for this residue in its default protonation state. Protonation states can be specified with the --protonation_state argument')
    parser.add_option('--PS_file',dest='PS_file',default='',type='string',action='store',help='Set protonation states according to the pdb2pka protonation state file (option --PS_file)')
    (options,args,) = parser.parse_args()

    ##
    ## parse optparse options
    ##
    ff = options.ff.lower()
    pdie = options.pdie
    verbose = options.verbose
    sdie = options.sdie
    maps = options.maps
    xdiel = options.xdiel
    ydiel = options.ydiel
    zdiel = options.zdiel
    kappa = options.kappa
    sd = options.sd

    #
    # Find the PDB file
    #
    if len(args) != 2:
        parser.error("Usage: pka.py [options] <pdbfile> <output directory>\n")
    input_path = args[0]
    output_path = args[1]

    ligand = None
    if options.ligand is not None:
        try:
            ligand = open(options.ligand, 'rU')
        except IOError:
            print 'Unable to find ligand file %s! Skipping...' % options.ligand

    #Set up the protien object
    #In the standalone version of pdb2pka this is redundent but needed so we emulate the
    #interface needed by pdb2pqr

    pdbfile = getPDBFile(input_path)
    pdblist, errlist = readPDB(pdbfile)
    if len(errlist) != 0 and verbose:
        print "Warning: %s is a non-standard PDB file.\n" %input_path
        print errlist
    #
    # Read the definition file
    #
    myDefinition = Definition()
    #
    #
    # Choose whether to include the ligand or not
    #
    # Add the ligand to the pdb2pqr arrays
    #
    if ligand is None:
        myProtein = Protein(pdblist, myDefinition)
    else:
        from pdb2pka.ligandclean import ligff
        myProtein, _, _ = ligff.initialize(myDefinition, ligand, pdblist, verbose)

    #
    # Call the pre_init function
    #
    return pre_init(protein=myProtein,
                    output_dir=output_path,
                    ff=ff,
                    verbose=verbose,
                    pdie=pdie,
                    sdie=sdie,
                    maps=maps,
                    xdiel=xdiel,
                    ydiel=ydiel,
                    zdiel=zdiel,
                    kappa=kappa,
                    sd=sd,
                    ligand=ligand),options
Exemplo n.º 3
0
def runPDB2PQR(pdblist, ff, options):
    """
        Run the PDB2PQR Suite

        Parameters
            pdblist: The list of objects that was read from the PDB file
                     given as input (list)
            ff:      The name of the forcefield (string)
            options: A dictionary of PDB2PQR options, including:
                     verbose: When 1, script will print information to stdout
                              When 0, no detailed information will be printed (int)
                     debump:  When 1, debump heavy atoms (int)
                     opt:     When 1, run hydrogen optimization (int)
                     ph:      The desired ph of the system (float)
                     outname: The name of the desired output file
        Returns
            header:  The PQR file header (string)
            lines:   The PQR file atoms (list)
            missedligandresidues:  A list of ligand residue names whose charges could
                     not be assigned (ligand)
    """
    ph = None
    pkaname = ""
    outname = ""
    outroot = ""
    typemapname = ""
    neutraln = None
    neutralc = None
    lines = []
    Lig = None
    atomcount = 0   # Count the number of ATOM records in pdb

    # userff is CGI-based User Forcefield file object

    if "userff" in options: userff = options["userff"]
    else: userff = None

    if "usernames" in options: usernames = options["usernames"]
    else: usernames = None

    if "verbose" in options: verbose = 1
    else: verbose = 0

    if "opt" in options: optflag = 1
    else: optflag = 0

    if "typemap" in options: typemapflag = 1
    else: typemapflag = 0

    if "chain" in options: chainflag = 1
    else: chainflag = 0

    if "outname" not in options or options["outname"] == None:
        text = "Error: Output name not set!"
        raise ValueError, text
    else:
        outname = options["outname"]
        period = string.rfind(outname,".")
        if period > 0: outroot = outname[0:period]
        else: outroot = outname

    if "ph" in options:
        pka = 1
        ph = options["ph"]
        pkaname = outroot + ".propka"
        if os.path.isfile(pkaname): os.remove(pkaname)
    else: pka = 0

    typemapname = "%s-typemap.html" % outroot

    extmap = options["extensions"]
    
    start = time.time()

    if verbose:
        print "Beginning PDB2PQR...\n"

    myDefinition = Definition()
    if verbose:
        print "Parsed Amino Acid definition file."   

    # Check for the presence of a ligand!  This code is taken from pdb2pka/pka.py

    if "ligand" in options:
        from pdb2pka.ligandclean import ligff
        myProtein, myDefinition, Lig = ligff.initialize(myDefinition, options["ligand"], pdblist, verbose)        
        for atom in myProtein.getAtoms():
            if atom.type == "ATOM": 
                atomcount += 1
    else:
        myProtein = Protein(pdblist, myDefinition)

    if verbose:
        print "Created protein object -"
        print "\tNumber of residues in protein: %s" % myProtein.numResidues()
        print "\tNumber of atoms in protein   : %s" % myProtein.numAtoms()
        
    myRoutines = Routines(myProtein, verbose)

    for residue in myProtein.getResidues():
        multoccupancy = 0
        for atom in residue.getAtoms():
            if atom.altLoc != "":
                multoccupancy = 1
                txt = "Warning: multiple occupancies found: %s in %s\n" % (atom.name, residue)
                sys.stderr.write(txt)
        if multoccupancy == 1:
            myRoutines.warnings.append("WARNING: multiple occupancies found in %s,\n" % (residue))
            myRoutines.warnings.append("         at least one of the instances is being ignored.\n")

    if "neutraln" in options: neutraln = 1
    if "neutralc" in options: neutralc = 1

    myRoutines.setTermini(neutraln, neutralc)
    myRoutines.updateBonds()

    if "clean" in options:
        header = ""
        lines = myProtein.printAtoms(myProtein.getAtoms(), chainflag)
      
        # Process the extensions
        for ext in extmap:
            module = extmap[ext]
            call = "module.%s(myRoutines, outroot)" % ext
            eval(call)  
    
        if verbose:
            print "Total time taken: %.2f seconds\n" % (time.time() - start)
        return header, lines

    if not "assign-only" in options:
        # It is OK to process ligands with no ATOM records in the pdb
        if atomcount == 0 and Lig != None:
            pass
        else:
            myRoutines.findMissingHeavy()
        myRoutines.updateSSbridges()

        if "debump" in options:
            myRoutines.debumpProtein()  

        if pka:
            myRoutines.runPROPKA(ph, ff, pkaname)

        myRoutines.addHydrogens()

        myhydRoutines = hydrogenRoutines(myRoutines)

        if "debump" in options:
            myRoutines.debumpProtein()  

        if optflag:
            myhydRoutines.setOptimizeableHydrogens()
            myhydRoutines.initializeFullOptimization()
            myhydRoutines.optimizeHydrogens()
        else:
            myhydRoutines = hydrogenRoutines(myRoutines)
            myhydRoutines.initializeWaterOptimization()
            myhydRoutines.optimizeHydrogens()

        # Special for GLH/ASH, since both conformations were added
        myhydRoutines.cleanup()


    else:  # Special case for HIS if using assign-only
        for residue in myProtein.getResidues():
            if isinstance(residue, HIS):
                myRoutines.applyPatch("HIP", residue)

    myRoutines.setStates()

    myForcefield = Forcefield(ff, myDefinition, userff, usernames)
    hitlist, misslist = myRoutines.applyForcefield(myForcefield)
  
    ligsuccess = 0
    if "ligand" in options:

        # If this is independent, we can assign charges and radii here
 
        for residue in myProtein.getResidues():
            if isinstance(residue, LIG):
                templist = []
                Lig.make_up2date(residue)
                for atom in residue.getAtoms():
                    atom.ffcharge = Lig.ligand_props[atom.name]["charge"]
                    atom.radius = Lig.ligand_props[atom.name]["radius"]
                    if atom in misslist:
                        misslist.pop(misslist.index(atom))
                        templist.append(atom)

                charge = residue.getCharge()
                if abs(charge - int(charge)) > 0.001:
                    # Ligand parameterization failed
                    myRoutines.warnings.append("WARNING: PDB2PQR could not successfully parameterize\n")
                    myRoutines.warnings.append("         the desired ligand; it has been left out of\n")
                    myRoutines.warnings.append("         the PQR file.\n")
                    myRoutines.warnings.append("\n")
                    
                    # remove the ligand
                    myProtein.residues.remove(residue) 
                    for chain in myProtein.chains:
                        if residue in chain.residues: chain.residues.remove(residue)
                else:
                    ligsuccess = 1
                    # Mark these atoms as hits
                    hitlist = hitlist + templist
    
    # Temporary fix; if ligand was successful, pull all ligands from misslist
    if ligsuccess:
        templist = misslist[:]
        for atom in templist:
            if isinstance(atom.residue, Amino) or isinstance(atom.residue, Nucleic): continue
            misslist.remove(atom)

    # Creat the Typemap
    if typemapflag:
        myProtein.createHTMLTypeMap(myDefinition, typemapname)

    # Grab the protein charge

    reslist, charge = myProtein.getCharge()

    # If we want a different naming scheme, use that

    if "ffout" in options:
        scheme = options["ffout"]
        userff = None # Currently not supported
        if scheme != ff: myNameScheme = Forcefield(scheme, myDefinition, userff)
        else: myNameScheme = myForcefield
        myRoutines.applyNameScheme(myNameScheme)

    header = printPQRHeader(misslist, reslist, charge, ff, myRoutines.getWarnings(), options)
    lines = myProtein.printAtoms(hitlist, chainflag)

    # Determine if any of the atoms in misslist were ligands
    missedligandresidues = []
    for atom in misslist:
        if isinstance(atom.residue, Amino) or isinstance(atom.residue, Nucleic): continue
        if atom.resName not in missedligandresidues:
            missedligandresidues.append(atom.resName)

    # Process the extensions
 
    for ext in extmap:
        module = extmap[ext]
        call = "module.%s(myRoutines, outroot)" % ext
        eval(call)

    if verbose:
        print "Total time taken: %.2f seconds\n" % (time.time() - start)

    return header, lines, missedligandresidues
Exemplo n.º 4
0
def runPDB2PQR(pdblist, ff,
               outname = "",
               ph = None,
               verbose = False,
               extentions = [],
               ententionOptions = ExtentionOptions(),
               clean = False,
               neutraln = False,
               neutralc = False,
               ligand = None,
               assign_only = False,
               chain = False,
               debump = True,
               opt = True,
               typemap = False,
               userff = None,
               usernames = None,
               ffout = None):
    """
        Run the PDB2PQR Suite

        Arguments:
            pdblist: The list of objects that was read from the PDB file
                     given as input (list)
            ff:      The name of the forcefield (string)
        
        Keyword Arguments:
            outname:       The name of the desired output file
            ph:            The desired ph of the system (float)
            verbose:       When True, script will print information to stdout
                             When False, no detailed information will be printed (float)
            extentions:      List of extensions to run
            ententionOptions:optionParser like option object that is passed to each object. 
            clean:         only return original PDB file in aligned format.
            neutraln:      Make the N-terminus of this protein neutral
            neutralc:      Make the C-terminus of this protein neutral
            ligand:        Calculate the parameters for the ligand in mol2 format at the given path.
            assign_only:   Only assign charges and radii - do not add atoms, debump, or optimize.
            chain:     Keep the chain ID in the output PQR file
            debump:        When 1, debump heavy atoms (int)
            opt:           When 1, run hydrogen optimization (int)
            typemap:       Create Typemap output.
            userff:        The user created forcefield file to use. Overrides ff.
            usernames:     The user created names file to use. Required if using userff.
            ffout:         Instead of using the standard canonical naming scheme for residue and atom names,  +
                           use the names from the given forcefield
            
        Returns
            header:  The PQR file header (string)
            lines:   The PQR file atoms (list)
            missedligandresidues:  A list of ligand residue names whose charges could
                     not be assigned (ligand)
    """
    
    pkaname = ""
    outroot = ""
    lines = []
    Lig = None
    atomcount = 0   # Count the number of ATOM records in pdb
    
    period = string.rfind(outname,".")
    
    if period > 0: 
        outroot = outname[0:period]
    else: 
        outroot = outname

    if not ph is None:
        pka = True
        pkaname = outroot + ".propka"
        if os.path.isfile(pkaname): os.remove(pkaname)
    else: 
        pka = False

    start = time.time()

    if verbose:
        print "Beginning PDB2PQR...\n"

    myDefinition = Definition()
    if verbose:
        print "Parsed Amino Acid definition file."   

    # Check for the presence of a ligand!  This code is taken from pdb2pka/pka.py

    if not ligand is None:
        from pdb2pka.ligandclean import ligff
        myProtein, myDefinition, Lig = ligff.initialize(myDefinition, ligand, pdblist, verbose)        
        for atom in myProtein.getAtoms():
            if atom.type == "ATOM": 
                atomcount += 1
    else:
        myProtein = Protein(pdblist, myDefinition)

    if verbose:
        print "Created protein object -"
        print "\tNumber of residues in protein: %s" % myProtein.numResidues()
        print "\tNumber of atoms in protein   : %s" % myProtein.numAtoms()
        
    myRoutines = Routines(myProtein, verbose)

    for residue in myProtein.getResidues():
        multoccupancy = 0
        for atom in residue.getAtoms():
            if atom.altLoc != "":
                multoccupancy = 1
                txt = "Warning: multiple occupancies found: %s in %s\n" % (atom.name, residue)
                sys.stderr.write(txt)
        if multoccupancy == 1:
            myRoutines.warnings.append("WARNING: multiple occupancies found in %s,\n" % (residue))
            myRoutines.warnings.append("         at least one of the instances is being ignored.\n")

    myRoutines.setTermini(neutraln, neutralc)
    myRoutines.updateBonds()

    if clean:
        header = ""
        lines = myProtein.printAtoms(myProtein.getAtoms(), chain)
      
        # Process the extensions
        # TODO: kill the eval call.
        for ext in extentions:
            module = extensions.extDict[ext]
            call = "module.%s(myRoutines, outroot)" % ext
            eval(call)  
    
        if verbose:
            print "Total time taken: %.2f seconds\n" % (time.time() - start)
        
        #Be sure to include None for missed ligand residues
        return header, lines, None
    
    #remove any future need to convert to lower case
    if not ff is None:
        ff = ff.lower()
    if not ffout is None:
        ffout = ffout.lower()

    if not assign_only:
        # It is OK to process ligands with no ATOM records in the pdb
        if atomcount == 0 and Lig != None:
            pass
        else:
            myRoutines.findMissingHeavy()
        myRoutines.updateSSbridges()

        if debump:
            myRoutines.debumpProtein()  

        if pka:
            myRoutines.runPROPKA(ph, ff, pkaname)

        myRoutines.addHydrogens()

        myhydRoutines = hydrogenRoutines(myRoutines)

        if debump:
            myRoutines.debumpProtein()  

        if opt:
            myhydRoutines.setOptimizeableHydrogens()
            myhydRoutines.initializeFullOptimization()
            myhydRoutines.optimizeHydrogens()
        else:
            myhydRoutines = hydrogenRoutines(myRoutines)
            myhydRoutines.initializeWaterOptimization()
            myhydRoutines.optimizeHydrogens()

        # Special for GLH/ASH, since both conformations were added
        myhydRoutines.cleanup()


    else:  # Special case for HIS if using assign-only
        for residue in myProtein.getResidues():
            if isinstance(residue, HIS):
                myRoutines.applyPatch("HIP", residue)

    myRoutines.setStates()

    myForcefield = Forcefield(ff, myDefinition, userff, usernames)
    hitlist, misslist = myRoutines.applyForcefield(myForcefield)
  
    ligsuccess = 0
    
    if not ligand is None:
        # If this is independent, we can assign charges and radii here 
        for residue in myProtein.getResidues():
            if isinstance(residue, LIG):
                templist = []
                Lig.make_up2date(residue)
                for atom in residue.getAtoms():
                    atom.ffcharge = Lig.ligand_props[atom.name]["charge"]
                    atom.radius = Lig.ligand_props[atom.name]["radius"]
                    if atom in misslist:
                        misslist.pop(misslist.index(atom))
                        templist.append(atom)

                charge = residue.getCharge()
                if abs(charge - int(charge)) > 0.001:
                    # Ligand parameterization failed
                    myRoutines.warnings.append("WARNING: PDB2PQR could not successfully parameterize\n")
                    myRoutines.warnings.append("         the desired ligand; it has been left out of\n")
                    myRoutines.warnings.append("         the PQR file.\n")
                    myRoutines.warnings.append("\n")
                    
                    # remove the ligand
                    myProtein.residues.remove(residue) 
                    for myChain in myProtein.chains:
                        if residue in myChain.residues: myChain.residues.remove(residue)
                else:
                    ligsuccess = 1
                    # Mark these atoms as hits
                    hitlist = hitlist + templist
    
    # Temporary fix; if ligand was successful, pull all ligands from misslist
    if ligsuccess:
        templist = misslist[:]
        for atom in templist:
            if isinstance(atom.residue, Amino) or isinstance(atom.residue, Nucleic): continue
            misslist.remove(atom)

    # Create the Typemap
    if typemap:
        typemapname = "%s-typemap.html" % outroot
        myProtein.createHTMLTypeMap(myDefinition, typemapname)

    # Grab the protein charge
    reslist, charge = myProtein.getCharge()

    # If we want a different naming scheme, use that

    if not ffout is None:
        scheme = ffout
        userff = None # Currently not supported
        if scheme != ff: 
            myNameScheme = Forcefield(scheme, myDefinition, userff)
        else: 
            myNameScheme = myForcefield
        myRoutines.applyNameScheme(myNameScheme)

    header = printPQRHeader(misslist, reslist, charge, ff, myRoutines.getWarnings(), ph, ffout)
    lines = myProtein.printAtoms(hitlist, chain)

    # Determine if any of the atoms in misslist were ligands
    missedligandresidues = []
    for atom in misslist:
        if isinstance(atom.residue, Amino) or isinstance(atom.residue, Nucleic): continue
        if atom.resName not in missedligandresidues:
            missedligandresidues.append(atom.resName)

    # Process the extensions
    #TODO: kill the eval call.
    for ext in extentions:
        module = extensions.extDict[ext]
        call = "module.%s(myRoutines, outroot)" % ext
        eval(call)

    if verbose:
        print "Total time taken: %.2f seconds\n" % (time.time() - start)

    return header, lines, missedligandresidues
Exemplo n.º 5
0
def runPDB2PQR(pdblist, ff, options):
    """
        Run the PDB2PQR Suite

        Parameters
            pdblist: The list of objects that was read from the PDB file
                     given as input (list)
            ff:      The name of the forcefield (string)
            options: A dictionary of PDB2PQR options, including:
                     verbose: When 1, script will print information to stdout
                              When 0, no detailed information will be printed (int)
                     debump:  When 1, debump heavy atoms (int)
                     opt:     When 1, run hydrogen optimization (int)
                     ph:      The desired ph of the system (float)
                     outname: The name of the desired output file
        Returns
            header:  The PQR file header (string)
            lines:   The PQR file atoms (list)
            missedligandresidues:  A list of ligand residue names whose charges could
                     not be assigned (ligand)
    """
    ph = None
    pkaname = ""
    outname = ""
    outroot = ""
    typemapname = ""
    lines = []

    # userff is CGI-based User Forcefield file object

    if "userff" in options: userff = options["userff"]
    else: userff = None

    if "verbose" in options: verbose = 1
    else: verbose = 0

    if "opt" in options: optflag = 1
    else: optflag = 0

    if "chain" in options: chainflag = 1
    else: chainflag = 0

    if "outname" not in options or options["outname"] == None:
        text = "Error: Output name not set!"
        raise ValueError, text
    else:
        outname = options["outname"]
        period = string.find(outname, ".")
        if period > 0: outroot = outname[0:period]
        else: outroot = outname

    if "ph" in options:
        pka = 1
        ph = options["ph"]
        pkaname = outroot + ".propka"
        if os.path.isfile(pkaname): os.remove(pkaname)
    else: pka = 0

    typemapname = "%s-typemap.html" % outroot

    extmap = options["extensions"]

    start = time.time()

    if verbose:
        print "Beginning PDB2PQR...\n"

    myDefinition = Definition()
    if verbose:
        print "Parsed Amino Acid definition file."

    # Check for the presence of a ligand!  This code is taken from pdb2pka/pka.py

    if "ligand" in options:
        from pdb2pka.ligandclean import ligff
        myProtein, myDefinition, Lig = ligff.initialize(
            myDefinition, options["ligand"], pdblist, verbose)
    else:
        myProtein = Protein(pdblist, myDefinition)

    if verbose:
        print "Created protein object -"
        print "\tNumber of residues in protein: %s" % myProtein.numResidues()
        print "\tNumber of atoms in protein   : %s" % myProtein.numAtoms()

    myRoutines = Routines(myProtein, verbose)

    myRoutines.setTermini()
    myRoutines.updateBonds()

    if "clean" in options:
        header = ""
        lines = myProtein.printAtoms(myProtein.getAtoms(), chainflag)

        # Process the extensions
        for ext in extmap:
            module = extmap[ext]
            call = "module.%s(myRoutines, outroot)" % ext
            eval(call)

        if verbose:
            print "Total time taken: %.2f seconds\n" % (time.time() - start)
        return header, lines

    if not "assign-only" in options:

        myRoutines.findMissingHeavy()
        myRoutines.updateSSbridges()

        if "debump" in options:
            myRoutines.debumpProtein()

        if pka:
            myRoutines.runPROPKA(ph, ff, pkaname)

        myRoutines.addHydrogens()

        if optflag:
            myhydRoutines = hydrogenRoutines(myRoutines)
            myhydRoutines.setOptimizeableHydrogens()

        if "debump" in options:
            myRoutines.debumpProtein()

        if optflag:
            myhydRoutines.initializeFullOptimization()
            myhydRoutines.optimizeHydrogens()
        else:
            myhydRoutines = hydrogenRoutines(myRoutines)
            myhydRoutines.initializeWaterOptimization()
            myhydRoutines.optimizeHydrogens()

    else:  # Special case for HIS if using assign-only
        for residue in myProtein.getResidues():
            if isinstance(residue, HIS):
                myRoutines.applyPatch("HIP", residue)

    myRoutines.setStates()

    myForcefield = Forcefield(ff, myDefinition, userff)
    hitlist, misslist = myRoutines.applyForcefield(myForcefield)

    ligsuccess = 0
    if "ligand" in options:

        # If this is independent, we can assign charges and radii here

        for residue in myProtein.getResidues():
            if isinstance(residue, LIG):
                templist = []
                Lig.make_up2date(residue)
                for atom in residue.getAtoms():
                    atom.ffcharge = Lig.ligand_props[atom.name]["charge"]
                    atom.radius = Lig.ligand_props[atom.name]["radius"]
                    if atom in misslist:
                        misslist.pop(misslist.index(atom))
                        templist.append(atom)

                charge = residue.getCharge()
                if abs(charge - int(charge)) > 0.001:
                    # Ligand parameterization failed
                    myRoutines.warnings.append(
                        "WARNING: PDB2PQR could not successfully parameterize\n"
                    )
                    myRoutines.warnings.append(
                        "         the desired ligand; it has been left out of\n"
                    )
                    myRoutines.warnings.append("         the PQR file.\n")
                    myRoutines.warnings.append("\n")

                    # remove the ligand
                    myProtein.residues.remove(residue)
                    for chain in myProtein.chains:
                        if residue in chain.residues:
                            chain.residues.remove(residue)
                else:
                    ligsuccess = 1
                    # Mark these atoms as hits
                    hitlist = hitlist + templist

    # Temporary fix; if ligand was successful, pull all ligands from misslist
    if ligsuccess:
        templist = misslist[:]
        for atom in templist:
            if isinstance(atom.residue, Amino) or isinstance(
                    atom.residue, Nucleic):
                continue
            misslist.remove(atom)

    # Creat the Typemap
    myProtein.createHTMLTypeMap(myDefinition, typemapname)

    # Grab the protein charge

    reslist, charge = myProtein.getCharge()

    # If we want a different naming scheme, use that

    if "ffout" in options:
        scheme = options["ffout"]
        userff = None  # Currently not supported
        if scheme != ff:
            myNameScheme = Forcefield(scheme, myDefinition, userff)
        else:
            myNameScheme = myForcefield
        myRoutines.applyNameScheme(myNameScheme)

    header = printHeader(misslist, reslist, charge, ff,
                         myRoutines.getWarnings(), options)
    lines = myProtein.printAtoms(hitlist, chainflag)

    # Determine if any of the atoms in misslist were ligands
    missedligandresidues = []
    for atom in misslist:
        if isinstance(atom.residue, Amino) or isinstance(
                atom.residue, Nucleic):
            continue
        if atom.resName not in missedligandresidues:
            missedligandresidues.append(atom.resName)

    # Process the extensions

    for ext in extmap:
        module = extmap[ext]
        call = "module.%s(myRoutines, outroot)" % ext
        eval(call)

    if verbose:
        print "Total time taken: %.2f seconds\n" % (time.time() - start)

    return header, lines, missedligandresidues
Exemplo n.º 6
0
def startpKa():
    """
        Function for starting pKa script from the command line.

        Returns
            protein:    The protein object as generated by PDB2PQR
            routines:   The routines object as generated by PDB2PQR
            forcefield: The forcefield object as generated by PDB2PQR
    """
    print
    print 'PDB2PQR pKa calculations'
    print

    parser = optparse.OptionParser()

    ##
    ## set optparse options
    ##
    parser.add_option(
        '-v',
        '--verbose',
        dest='verbose',
        action="store_true",
        default=False,
    )
    parser.add_option(
        '--pdie',
        dest='pdie',
        default=8,
        type='int',
        help='<protein dielectric constant>',
    )
    parser.add_option(
        '--sdie',
        dest='sdie',
        default=80,
        type='int',
        help='<solvent dielectric constant>',
    )
    parser.add_option(
        '--ff',
        dest='ff',
        type='choice',
        default='parse',
        choices=(
            "amber",
            "AMBER",
            "charmm",
            "CHARMM",
            "parse",
            "PARSE",
        ),
        help='<force field (amber, charmm, parse)>',
    )
    parser.add_option(
        '--resume',
        dest='resume',
        action="store_true",
        default=False,
        help='resume run from saved state.',
    )
    parser.add_option(
        '--ligand',
        dest='ligand',
        type='str',
        help='<ligand in MOL2 format>',
    )
    parser.add_option(
        '--maps',
        dest='maps',
        default=None,
        type='int',
        help='<1 for using provided 3D maps; 2 for genereting new maps>',
    )
    parser.add_option(
        '--xdiel',
        dest='xdiel',
        default=None,
        type='str',
        help='<xdiel maps>',
    )
    parser.add_option(
        '--ydiel',
        dest='ydiel',
        default=None,
        type='str',
        help='<ydiel maps>',
    )
    parser.add_option(
        '--zdiel',
        dest='zdiel',
        default=None,
        type='str',
        help='<zdiel maps>',
    )
    parser.add_option(
        '--kappa',
        dest='kappa',
        default=None,
        type='str',
        help='<ion-accessibility map>',
    )
    parser.add_option(
        '--smooth',
        dest='sd',
        default=None,
        type='float',
        help=
        '<st.dev [A] of Gaussian smooting of 3D maps at the boundary, bandthwith=3 st.dev>',
    )
    #
    # Cut off energy for calculating non-charged-charged interaction energies
    #
    parser.add_option(
        '--pairene',
        dest='pairene',
        type='float',
        default=1.0,
        help=
        'Cutoff energy in kT for calculating non charged-charged interaction energies. Default: %default'
    )
    #
    # Options for doing partial calculations
    #
    parser.add_option(
        '--res_energy',
        dest='desolvation_res',
        default=[],
        action='append',
        type='string',
        help=
        'Calculate desolvation energy and interaction energy for this residue in its default protonation state. Protonation states can be specified with the --protonation_state argument'
    )
    parser.add_option(
        '--PS_file',
        dest='PS_file',
        default='',
        type='string',
        action='store',
        help=
        'Set protonation states according to the pdb2pka protonation state file (option --PS_file)'
    )
    (
        options,
        args,
    ) = parser.parse_args()

    ##
    ## parse optparse options
    ##
    ff = options.ff.lower()
    pdie = options.pdie
    verbose = options.verbose
    sdie = options.sdie
    maps = options.maps
    xdiel = options.xdiel
    ydiel = options.ydiel
    zdiel = options.zdiel
    kappa = options.kappa
    sd = options.sd

    #
    # Find the PDB file
    #
    if len(args) != 2:
        parser.error("Usage: pka.py [options] <pdbfile> <output directory>\n")
    input_path = args[0]
    output_path = args[1]

    ligand = None
    if options.ligand is not None:
        try:
            ligand = open(options.ligand, 'rU')
        except IOError:
            print 'Unable to find ligand file %s! Skipping...' % options.ligand

    #Set up the protien object
    #In the standalone version of pdb2pka this is redundent but needed so we emulate the
    #interface needed by pdb2pqr

    pdbfile = getPDBFile(input_path)
    pdblist, errlist = readPDB(pdbfile)
    if len(errlist) != 0 and verbose:
        print "Warning: %s is a non-standard PDB file.\n" % input_path
        print errlist
    #
    # Read the definition file
    #
    myDefinition = Definition()
    #
    #
    # Choose whether to include the ligand or not
    #
    # Add the ligand to the pdb2pqr arrays
    #
    if ligand is None:
        myProtein = Protein(pdblist, myDefinition)
    else:
        from pdb2pka.ligandclean import ligff
        myProtein, _, _ = ligff.initialize(myDefinition, ligand, pdblist,
                                           verbose)

    #
    # Call the pre_init function
    #
    return pre_init(protein=myProtein,
                    output_dir=output_path,
                    ff=ff,
                    verbose=verbose,
                    pdie=pdie,
                    sdie=sdie,
                    maps=maps,
                    xdiel=xdiel,
                    ydiel=ydiel,
                    zdiel=zdiel,
                    kappa=kappa,
                    sd=sd,
                    ligand=ligand), options
Exemplo n.º 7
0
def pre_init(original_pdb_list=None,
             output_dir=None,
             ff=None,
             verbose=False,
             pdie=8.0,
             sdie=80,
             maps=None,
             xdiel=None,
             ydiel=None,
             zdiel=None,
             kappa=None,
             sd=None,
             ligand=None):
    """This function cleans the PDB and prepares the APBS input file

    Prepares the output folder."""

    #prepare the output directory

    output_dir = os.path.abspath(output_dir)

    try:
        os.makedirs(output_dir)
    except OSError:
        if not os.path.isdir(output_dir):
            raise ValueError('Target directory is a file! Aborting.')

    workspace_dir = os.path.join(output_dir, 'workspace')

    try:
        os.makedirs(workspace_dir)
    except OSError:
        if not os.path.isdir(output_dir):
            raise ValueError('Target directory is a file! Aborting.')

    #
    # remove hydrogen atoms
    #

    working_pdb_filename = os.path.join(workspace_dir, 'working.pdb')

    pka_help.dump_protein_no_hydrogens(original_pdb_list, working_pdb_filename)
    #
    # Get the PDBfile
    #
    pdbfile = getPDBFile(working_pdb_filename)
    pdblist, errlist = readPDB(pdbfile)

    if verbose:
        print "Beginning PDB2PKA...\n"
    #
    # Read the definition file
    #
    myDefinition = Definition()
    ligand_titratable_groups = None
    #
    #
    # Choose whether to include the ligand or not
    #
    # Add the ligand to the pdb2pqr arrays
    #
    Lig = None
    if ligand is None:
        myProtein = Protein(pdblist, myDefinition)
    else:
        from pdb2pka.ligandclean import ligff
        myProtein, myDefinition, Lig = ligff.initialize(
            myDefinition, ligand, pdblist, verbose)
    #
    # =======================================================================
    #
    # We have identified the structural elements, now contiue with the setup
    #
    # Print something for some reason?
    #
    if verbose:
        print "Created protein object -"
        print "\tNumber of residues in protein: %s" % myProtein.numResidues()
        print "\tNumber of atoms in protein   : %s" % myProtein.numAtoms()
    #
    # Set up all other routines
    #
    myRoutines = Routines(myProtein, verbose)  #myDefinition)
    myRoutines.updateResidueTypes()
    myRoutines.updateSSbridges()
    myRoutines.updateBonds()
    myRoutines.setTermini()
    myRoutines.updateInternalBonds()

    myRoutines.applyNameScheme(Forcefield(ff, myDefinition, None))
    myRoutines.findMissingHeavy()
    myRoutines.addHydrogens()
    myRoutines.debumpProtein()

    #myRoutines.randomizeWaters()
    myProtein.reSerialize()
    #
    # Inject the information on hydrogen conformations in the HYDROGENS.DAT arrays
    # We get this information from ligand_titratable_groups
    #
    from src.hydrogens import hydrogenRoutines
    myRoutines.updateInternalBonds()
    myRoutines.calculateDihedralAngles()
    myhydRoutines = hydrogenRoutines(myRoutines)
    #
    # Here we should inject the info!!
    #
    myhydRoutines.setOptimizeableHydrogens()
    myhydRoutines.initializeFullOptimization()
    myhydRoutines.optimizeHydrogens()
    myhydRoutines.cleanup()
    myRoutines.setStates()

    #
    # Choose the correct forcefield
    #
    myForcefield = Forcefield(ff, myDefinition, None)
    if Lig:
        hitlist, misslist = myRoutines.applyForcefield(myForcefield)
        #
        # Can we get charges for the ligand?
        #
        templist = []
        ligsuccess = False
        for residue in myProtein.getResidues():
            if isinstance(residue, LIG):
                templist = []
                Lig.make_up2date(residue)
                net_charge = 0.0
                print 'Ligand', residue
                print 'Atom\tCharge\tRadius'
                for atom in residue.getAtoms():
                    if atom.mol2charge:
                        atom.ffcharge = atom.mol2charge
                    else:
                        atom.ffcharge = Lig.ligand_props[atom.name]["charge"]
                    #
                    # Find the net charge
                    #
                    net_charge = net_charge + atom.ffcharge
                    #
                    # Assign radius
                    #
                    atom.radius = Lig.ligand_props[atom.name]["radius"]
                    print '%s\t%6.4f\t%6.4f' % (atom.name, atom.ffcharge,
                                                atom.radius)
                    if atom in misslist:
                        misslist.pop(misslist.index(atom))
                        templist.append(atom)
                    #
                    # Store the charge and radius in the atom instance for later use
                    # This really should be done in a nicer way, but this will do for now
                    #
                    atom.secret_radius = atom.radius
                    atom.secret_charge = atom.ffcharge
                    #
                    #

                charge = residue.getCharge()
                if abs(charge - round(charge)) > 0.01:
                    # Ligand parameterization failed
                    myProtein.residues.remove(residue)
                    raise Exception('Non-integer charge on ligand: %8.5f' %
                                    charge)
                else:
                    ligsuccess = 1
                    # Mark these atoms as hits
                    hitlist = hitlist + templist
                #
                # Print the net charge
                #
                print 'Net charge for ligand %s is: %5.3f' % (residue.name,
                                                              net_charge)
        #
        # Temporary fix; if ligand was successful, pull all ligands from misslist
        # Not sure if this is needed at all here ...? (Jens wrote this)
        #
        if ligsuccess:
            templist = misslist[:]
            for atom in templist:
                if isinstance(atom.residue, Amino) or isinstance(
                        atom.residue, Nucleic):
                    continue
                misslist.remove(atom)

    if verbose:
        print "Created protein object (after processing myRoutines) -"
        print "\tNumber of residues in protein: %s" % myProtein.numResidues()
        print "\tNumber of atoms in protein   : %s" % myProtein.numAtoms()
    #
    # Create the APBS input file
    #
    import src.psize
    size = src.psize.Psize()

    method = ""
    split = 0

    igen = inputgen_pKa.inputGen(working_pdb_filename)
    #
    # For convenience
    #
    igen.pdie = pdie
    print 'Setting protein dielectric constant to ', igen.pdie
    igen.sdie = sdie
    igen.maps = maps
    if maps == 1:
        print "Using dielectric and mobile ion-accessibility function maps in PBE"
        if xdiel:
            igen.xdiel = xdiel
        else:
            raise PDB2PKAError('X dielectric map is missing')
        if ydiel:
            igen.ydiel = ydiel
        else:
            raise PDB2PKAError("Y dielectric map is missing\n")
        if zdiel:
            igen.zdiel = zdiel
        else:
            raise PDB2PKAError("Z dielectric map is missing\n")

        print 'Setting dielectric function maps: %s, %s, %s' % (
            igen.xdiel, igen.ydiel, igen.zdiel)

        if kappa:
            igen.kappa = kappa
        else:
            raise PDB2PKAError("Mobile ion-accessibility map is missing\n")

        print 'Setting mobile ion-accessibility function map to: ', igen.kappa

        if sd:
            xdiel_smooth, ydiel_smooth, zdiel_smooth = smooth(
                xdiel, ydiel, zdiel)
            igen.xdiel = xdiel_smooth
            igen.ydiel = ydiel_smooth
            igen.zdiel = zdiel_smooth
    #
    # Return all we need
    #
    return output_dir, myProtein, myRoutines, myForcefield, igen, ligand_titratable_groups, maps, sd
Exemplo n.º 8
0
def runPDB2PQR(pdblist,
               ff,
               outname="",
               ph=None,
               verbose=False,
               selectedExtensions=[],
               extensionOptions=utilities.ExtraOptions(),
               ph_calc_method=None,
               ph_calc_options=None,
               clean=False,
               neutraln=False,
               neutralc=False,
               ligand=None,
               assign_only=False,
               chain=False,
               drop_water=False,
               debump=True,
               opt=True,
               typemap=False,
               userff=None,
               usernames=None,
               ffout=None,
               holdList=None,
               commandLine=None,
               include_old_header=False):
    """
        Run the PDB2PQR Suite

        Arguments:
            pdblist: The list of objects that was read from the PDB file
                     given as input (list)
            ff:      The name of the forcefield (string)

        Keyword Arguments:
            outname:       The name of the desired output file
            ph:            The desired ph of the system (float)
            verbose:       When True, script will print information to stdout
                             When False, no detailed information will be printed (float)
            extensions:      List of extensions to run
            extensionOptions:optionParser like option object that is passed to each object.
            ph_calc_method: pKa calculation method ("propka","propka31","pdb2pka")
            ph_calc_options: optionParser like option object for propka30.
            clean:         only return original PDB file in aligned format.
            neutraln:      Make the N-terminus of this protein neutral
            neutralc:      Make the C-terminus of this protein neutral
            ligand:        Calculate the parameters for the ligand in mol2 format at the given path.
            assign_only:   Only assign charges and radii - do not add atoms, debump, or optimize.
            chain:         Keep the chain ID in the output PQR file
            drop_water:    Remove water molecules from output
            debump:        When 1, debump heavy atoms (int)
            opt:           When 1, run hydrogen optimization (int)
            typemap:       Create Typemap output.
            userff:        The user created forcefield file to use. Overrides ff.
            usernames:     The user created names file to use. Required if using userff.
            ffout:         Instead of using the standard canonical naming scheme for residue and atom names,  +
                           use the names from the given forcefield
            commandLine:   command line used (if any) to launch the program. Included in output header.
            include_old_header: Include most of the PDB header in output.
            holdlist:      A list of residues not to be optimized, as [(resid, chain, icode)]
            pdb2pka_params: parameters for running pdb2pka.

        Returns
            header:  The PQR file header (string)
            lines:   The PQR file atoms (list)
            missedligandresidues:  A list of ligand residue names whose charges could
                     not be assigned (ligand)
            protein: The protein object
    """

    pkaname = ""
    lines = []
    Lig = None
    atomcount = 0  # Count the number of ATOM records in pdb

    outroot = utilities.getPQRBaseFileName(outname)

    if ph_calc_method == 'propka':
        pkaname = outroot + ".propka"
        #TODO: What? Shouldn't it be up to propka on how to handle this?
        if os.path.isfile(pkaname):
            os.remove(pkaname)

    start = time.time()

    if verbose:
        print("Beginning PDB2PQR...\n")

    myDefinition = Definition()
    if verbose:
        print("Parsed Amino Acid definition file.")

    if drop_water:
        # Remove the waters
        pdblist_new = []
        for record in pdblist:
            if isinstance(record, (HETATM, ATOM, SIGATM, SEQADV)):
                if record.resName in WAT.water_residue_names:
                    continue
            pdblist_new.append(record)

        pdblist = pdblist_new

    # Check for the presence of a ligand!  This code is taken from pdb2pka/pka.py

    if not ligand is None:
        from pdb2pka.ligandclean import ligff
        myProtein, myDefinition, Lig = ligff.initialize(
            myDefinition, ligand, pdblist, verbose)
        for atom in myProtein.getAtoms():
            if atom.type == "ATOM":
                atomcount += 1
    else:
        myProtein = Protein(pdblist, myDefinition)

    if verbose:
        print("Created protein object -")
        print("\tNumber of residues in protein: %s" % myProtein.numResidues())
        print("\tNumber of atoms in protein   : %s" % myProtein.numAtoms())

    myRoutines = Routines(myProtein, verbose)

    for residue in myProtein.getResidues():
        multoccupancy = 0
        for atom in residue.getAtoms():
            if atom.altLoc != "":
                multoccupancy = 1
                txt = "Warning: multiple occupancies found: %s in %s\n" % (
                    atom.name, residue)
                # sys.stdout.write(txt)
        if multoccupancy == 1:
            myRoutines.warnings.append(
                "WARNING: multiple occupancies found in %s,\n" % (residue))
            myRoutines.warnings.append(
                "         at least one of the instances is being ignored.\n")

    myRoutines.setTermini(neutraln, neutralc)
    myRoutines.updateBonds()

    if clean:
        header = ""
        lines = myProtein.printAtoms(myProtein.getAtoms(), chain)

        # Process the extensions
        for ext in selectedExtensions:
            module = extensions.extDict[ext]
            #TODO: figure out a way to do this without crashing...
            #tempRoutines = copy.deepcopy(myRoutines)
            module.run_extension(myRoutines, outroot, extensionOptions)

        if verbose:
            print("Total time taken: %.2f seconds\n" % (time.time() - start))

        #Be sure to include None for missed ligand residues
        return dict(header=header, lines=lines)

    #remove any future need to convert to lower case
    if not ff is None:
        ff = ff.lower()
    if not ffout is None:
        ffout = ffout.lower()

    if not assign_only:
        # It is OK to process ligands with no ATOM records in the pdb
        if atomcount == 0 and Lig != None:
            pass
        else:
            myRoutines.findMissingHeavy()
        myRoutines.updateSSbridges()

        if debump:
            myRoutines.debumpProtein()

        if ph_calc_method == 'propka':
            myRoutines.runPROPKA(ph,
                                 ff,
                                 outroot,
                                 pkaname,
                                 ph_calc_options,
                                 version=30)
        elif ph_calc_method == 'propka31':
            myRoutines.runPROPKA(ph,
                                 ff,
                                 outroot,
                                 pkaname,
                                 ph_calc_options,
                                 version=31)
        elif ph_calc_method == 'pdb2pka':
            myRoutines.runPDB2PKA(ph, ff, pdblist, ligand, verbose,
                                  ph_calc_options)

        myRoutines.addHydrogens()

        myhydRoutines = hydrogenRoutines(myRoutines)

        if debump:
            myRoutines.debumpProtein()

        if opt:
            myhydRoutines.setOptimizeableHydrogens()
            # TONI fixing residues - myhydRoutines has a reference to myProtein, so i'm altering it in place
            myRoutines.holdResidues(holdList)
            myhydRoutines.initializeFullOptimization()
            myhydRoutines.optimizeHydrogens()
        else:
            myhydRoutines.initializeWaterOptimization()
            myhydRoutines.optimizeHydrogens()

        # Special for GLH/ASH, since both conformations were added
        myhydRoutines.cleanup()

    else:  # Special case for HIS if using assign-only
        for residue in myProtein.getResidues():
            if isinstance(residue, HIS):
                myRoutines.applyPatch("HIP", residue)

    myRoutines.setStates()

    myForcefield = Forcefield(ff, myDefinition, userff, usernames)
    hitlist, misslist = myRoutines.applyForcefield(myForcefield)

    ligsuccess = 0

    if not ligand is None:
        # If this is independent, we can assign charges and radii here
        for residue in myProtein.getResidues():
            if isinstance(residue, LIG):
                templist = []
                Lig.make_up2date(residue)
                for atom in residue.getAtoms():
                    atom.ffcharge = Lig.ligand_props[atom.name]["charge"]
                    atom.radius = Lig.ligand_props[atom.name]["radius"]
                    if atom in misslist:
                        misslist.pop(misslist.index(atom))
                        templist.append(atom)

                charge = residue.getCharge()
                if abs(charge - int(charge)) > 0.001:
                    # Ligand parameterization failed
                    myRoutines.warnings.append(
                        "WARNING: PDB2PQR could not successfully parameterize\n"
                    )
                    myRoutines.warnings.append(
                        "         the desired ligand; it has been left out of\n"
                    )
                    myRoutines.warnings.append("         the PQR file.\n")
                    myRoutines.warnings.append("\n")

                    # remove the ligand
                    myProtein.residues.remove(residue)
                    for myChain in myProtein.chains:
                        if residue in myChain.residues:
                            myChain.residues.remove(residue)
                else:
                    ligsuccess = 1
                    # Mark these atoms as hits
                    hitlist = hitlist + templist

    # Temporary fix; if ligand was successful, pull all ligands from misslist
    if ligsuccess:
        templist = misslist[:]
        for atom in templist:
            if isinstance(atom.residue, (Amino, Nucleic)):
                continue
            misslist.remove(atom)

    # Create the Typemap
    if typemap:
        typemapname = "%s-typemap.html" % outroot
        myProtein.createHTMLTypeMap(myDefinition, typemapname)

    # Grab the protein charge
    reslist, charge = myProtein.getCharge()

    # If we want a different naming scheme, use that

    if not ffout is None:
        scheme = ffout
        userff = None  # Currently not supported
        if scheme != ff:
            myNameScheme = Forcefield(scheme, myDefinition, userff)
        else:
            myNameScheme = myForcefield
        myRoutines.applyNameScheme(myNameScheme)

    header = printPQRHeader(pdblist,
                            misslist,
                            reslist,
                            charge,
                            ff,
                            myRoutines.getWarnings(),
                            ph_calc_method,
                            ph,
                            ffout,
                            commandLine,
                            include_old_header=include_old_header)
    lines = myProtein.printAtoms(hitlist, chain)

    # Determine if any of the atoms in misslist were ligands
    missedligandresidues = []
    for atom in misslist:
        if isinstance(atom.residue, (Amino, Nucleic)):
            continue
        if atom.resName not in missedligandresidues:
            missedligandresidues.append(atom.resName)

    # Process the extensions
    for ext in selectedExtensions:
        module = extensions.extDict[ext]
        #TODO: figure out a way to do this without crashing...
        #tempRoutines = copy.deepcopy(myRoutines)
        module.run_extension(myRoutines, outroot, extensionOptions)

    if verbose:
        print("Total time taken: %.2f seconds\n" % (time.time() - start))

    return dict(header=header,
                lines=lines,
                missedligands=missedligandresidues,
                protein=myProtein,
                routines=myRoutines)
Exemplo n.º 9
0
def runPDB2PQR(pdblist, ff,
               outname = "",
               ph = None,
               verbose = False,
               selectedExtensions = [],
               extensionOptions = utilities.ExtraOptions(),
               ph_calc_method = None,
               ph_calc_options = None,
               clean = False,
               neutraln = False,
               neutralc = False,
               ligand = None,
               assign_only = False,
               chain = False,
			   drop_water = False,
               debump = True,
               opt = True,
               typemap = False,
               userff = None,
               usernames = None,
               ffout = None,
               holdList = None,
               commandLine = None,
               include_old_header = False):
    """
        Run the PDB2PQR Suite

        Arguments:
            pdblist: The list of objects that was read from the PDB file
                     given as input (list)
            ff:      The name of the forcefield (string)

        Keyword Arguments:
            outname:       The name of the desired output file
            ph:            The desired ph of the system (float)
            verbose:       When True, script will print information to stdout
                             When False, no detailed information will be printed (float)
            extensions:      List of extensions to run
            extensionOptions:optionParser like option object that is passed to each object.
            ph_calc_method: pKa calculation method ("propka","propka31","pdb2pka")
            ph_calc_options: optionParser like option object for propka30.
            clean:         only return original PDB file in aligned format.
            neutraln:      Make the N-terminus of this protein neutral
            neutralc:      Make the C-terminus of this protein neutral
            ligand:        Calculate the parameters for the ligand in mol2 format at the given path.
            assign_only:   Only assign charges and radii - do not add atoms, debump, or optimize.
            chain:         Keep the chain ID in the output PQR file
            drop_water:    Remove water molecules from output
            debump:        When 1, debump heavy atoms (int)
            opt:           When 1, run hydrogen optimization (int)
            typemap:       Create Typemap output.
            userff:        The user created forcefield file to use. Overrides ff.
            usernames:     The user created names file to use. Required if using userff.
            ffout:         Instead of using the standard canonical naming scheme for residue and atom names,  +
                           use the names from the given forcefield
            commandLine:   command line used (if any) to launch the program. Included in output header.
            include_old_header: Include most of the PDB header in output.
            holdlist:      A list of residues not to be optimized, as [(resid, chain, icode)]
            pdb2pka_params: parameters for running pdb2pka.

        Returns
            header:  The PQR file header (string)
            lines:   The PQR file atoms (list)
            missedligandresidues:  A list of ligand residue names whose charges could
                     not be assigned (ligand)
            protein: The protein object
    """

    pkaname = ""
    lines = []
    Lig = None
    atomcount = 0   # Count the number of ATOM records in pdb

    outroot = utilities.getPQRBaseFileName(outname)

    if ph_calc_method == 'propka':
        pkaname = outroot + ".propka"
        #TODO: What? Shouldn't it be up to propka on how to handle this?
        if os.path.isfile(pkaname):
            os.remove(pkaname)

    start = time.time()

    if verbose:
        print("Beginning PDB2PQR...\n")

    myDefinition = Definition()
    if verbose:
        print("Parsed Amino Acid definition file.")

    if drop_water:
        # Remove the waters
        pdblist_new = []
        for record in pdblist:
            if isinstance(record, (HETATM, ATOM, SIGATM, SEQADV)):
                if record.resName in WAT.water_residue_names:
                    continue
            pdblist_new.append(record)

        pdblist = pdblist_new

    # Check for the presence of a ligand!  This code is taken from pdb2pka/pka.py

    if not ligand is None:
        from pdb2pka.ligandclean import ligff
        myProtein, myDefinition, Lig = ligff.initialize(myDefinition, ligand, pdblist, verbose)
        for atom in myProtein.getAtoms():
            if atom.type == "ATOM":
                atomcount += 1
    else:
        myProtein = Protein(pdblist, myDefinition)

    if verbose:
        print("Created protein object -")
        print("\tNumber of residues in protein: %s" % myProtein.numResidues())
        print("\tNumber of atoms in protein   : %s" % myProtein.numAtoms())

    myRoutines = Routines(myProtein, verbose)

    for residue in myProtein.getResidues():
        multoccupancy = 0
        for atom in residue.getAtoms():
            if atom.altLoc != "":
                multoccupancy = 1
                txt = "Warning: multiple occupancies found: %s in %s\n" % (atom.name, residue)
                sys.stderr.write(txt)
        if multoccupancy == 1:
            myRoutines.warnings.append("WARNING: multiple occupancies found in %s,\n" % (residue))
            myRoutines.warnings.append("         at least one of the instances is being ignored.\n")

    myRoutines.setTermini(neutraln, neutralc)
    myRoutines.updateBonds()

    if clean:
        header = ""
        lines = myProtein.printAtoms(myProtein.getAtoms(), chain)

        # Process the extensions
        for ext in selectedExtensions:
            module = extensions.extDict[ext]
            #TODO: figure out a way to do this without crashing...
            #tempRoutines = copy.deepcopy(myRoutines)
            module.run_extension(myRoutines, outroot, extensionOptions)

        if verbose:
            print("Total time taken: %.2f seconds\n" % (time.time() - start))

        #Be sure to include None for missed ligand residues
        return header, lines, None

    #remove any future need to convert to lower case
    if not ff is None:
        ff = ff.lower()
    if not ffout is None:
        ffout = ffout.lower()

    if not assign_only:
        # It is OK to process ligands with no ATOM records in the pdb
        if atomcount == 0 and Lig != None:
            pass
        else:
            myRoutines.findMissingHeavy()
        myRoutines.updateSSbridges()

        if debump:
            myRoutines.debumpProtein()

        if ph_calc_method == 'propka':
            myRoutines.runPROPKA(ph, ff, outroot, pkaname, ph_calc_options, version=30)
        elif ph_calc_method == 'propka31':
            myRoutines.runPROPKA(ph, ff, outroot, pkaname, ph_calc_options, version=31)
        elif ph_calc_method == 'pdb2pka':
            myRoutines.runPDB2PKA(ph, ff, pdblist, ligand, verbose, ph_calc_options)

        myRoutines.addHydrogens()

        myhydRoutines = hydrogenRoutines(myRoutines)

        if debump:
            myRoutines.debumpProtein()

        if opt:
            myhydRoutines.setOptimizeableHydrogens()
            # TONI fixing residues - myhydRoutines has a reference to myProtein, so i'm altering it in place
            myRoutines.holdResidues(holdList)
            myhydRoutines.initializeFullOptimization()
            myhydRoutines.optimizeHydrogens()
        else:
            myhydRoutines.initializeWaterOptimization()
            myhydRoutines.optimizeHydrogens()

        # Special for GLH/ASH, since both conformations were added
        myhydRoutines.cleanup()


    else:  # Special case for HIS if using assign-only
        for residue in myProtein.getResidues():
            if isinstance(residue, HIS):
                myRoutines.applyPatch("HIP", residue)

    myRoutines.setStates()

    myForcefield = Forcefield(ff, myDefinition, userff, usernames)
    hitlist, misslist = myRoutines.applyForcefield(myForcefield)

    ligsuccess = 0

    if not ligand is None:
        # If this is independent, we can assign charges and radii here
        for residue in myProtein.getResidues():
            if isinstance(residue, LIG):
                templist = []
                Lig.make_up2date(residue)
                for atom in residue.getAtoms():
                    atom.ffcharge = Lig.ligand_props[atom.name]["charge"]
                    atom.radius = Lig.ligand_props[atom.name]["radius"]
                    if atom in misslist:
                        misslist.pop(misslist.index(atom))
                        templist.append(atom)

                charge = residue.getCharge()
                if abs(charge - int(charge)) > 0.001:
                    # Ligand parameterization failed
                    myRoutines.warnings.append("WARNING: PDB2PQR could not successfully parameterize\n")
                    myRoutines.warnings.append("         the desired ligand; it has been left out of\n")
                    myRoutines.warnings.append("         the PQR file.\n")
                    myRoutines.warnings.append("\n")

                    # remove the ligand
                    myProtein.residues.remove(residue)
                    for myChain in myProtein.chains:
                        if residue in myChain.residues: myChain.residues.remove(residue)
                else:
                    ligsuccess = 1
                    # Mark these atoms as hits
                    hitlist = hitlist + templist

    # Temporary fix; if ligand was successful, pull all ligands from misslist
    if ligsuccess:
        templist = misslist[:]
        for atom in templist:
            if isinstance(atom.residue, (Amino, Nucleic)):
                continue
            misslist.remove(atom)

    # Create the Typemap
    if typemap:
        typemapname = "%s-typemap.html" % outroot
        myProtein.createHTMLTypeMap(myDefinition, typemapname)

    # Grab the protein charge
    reslist, charge = myProtein.getCharge()

    # If we want a different naming scheme, use that

    if not ffout is None:
        scheme = ffout
        userff = None # Currently not supported
        if scheme != ff:
            myNameScheme = Forcefield(scheme, myDefinition, userff)
        else:
            myNameScheme = myForcefield
        myRoutines.applyNameScheme(myNameScheme)

    header = printPQRHeader(pdblist, misslist, reslist, charge, ff,
                            myRoutines.getWarnings(), ph_calc_method, ph, ffout, commandLine,
                            include_old_header=include_old_header)
    lines = myProtein.printAtoms(hitlist, chain)

    # Determine if any of the atoms in misslist were ligands
    missedligandresidues = []
    for atom in misslist:
        if isinstance(atom.residue, (Amino, Nucleic)):
            continue
        if atom.resName not in missedligandresidues:
            missedligandresidues.append(atom.resName)

    # Process the extensions
    for ext in selectedExtensions:
        module = extensions.extDict[ext]
        #TODO: figure out a way to do this without crashing...
        #tempRoutines = copy.deepcopy(myRoutines)
        module.run_extension(myRoutines, outroot, extensionOptions)


    if verbose:
        print("Total time taken: %.2f seconds\n" % (time.time() - start))

    return header, lines, missedligandresidues, myProtein