def _analyse(self, mol, rtf, prm, pdb, traj): m = FFMolecule( filename=mol, rtf=rtf, prm=prm) m.read(traj) torsions = m.getSortTorsions() print(torsions) for i in range(len(torsions[0])): # For each torsion, measure title = torsions[1][i][0] title = title + "-" + torsions[1][i][1] title = title + "-" + torsions[1][i][2] title = title + "-" + torsions[1][i][3] (r, theta) = self._measure_torsion(torsions[0][i], m.coords) self._plot_scatter(r, theta, title) self._plot_hist(theta, title)
def test_makeDihedralUnique(self): from htmd.home import home from htmd.parameterization.ffmolecule import FFMolecule, FFTypeMethod molFile = os.path.join(home('test-param'), 'glycol.mol2') self.df.molecule = FFMolecule(molFile, method=FFTypeMethod.GAFF2) types = [ self.df.molecule._rtf.type_by_index[i] for i in range(self.df.molecule.numAtoms) ] self.assertListEqual( types, ['oh', 'c3', 'c3', 'oh', 'ho', 'h1', 'h1', 'h1', 'h1', 'ho']) self.df.molecule = FFMolecule(molFile, method=FFTypeMethod.GAFF2) self.df._makeDihedralUnique([0, 1, 2, 3]) types = [ self.df.molecule._rtf.type_by_index[i] for i in range(self.df.molecule.numAtoms) ] self.assertListEqual(types, [ 'ohx0', 'c3x0', 'c3x0', 'ohx0', 'ho', 'h1', 'h1', 'h1', 'h1', 'ho' ]) self.df.molecule = FFMolecule(molFile, method=FFTypeMethod.GAFF2) self.df._makeDihedralUnique([4, 0, 1, 2]) types = [ self.df.molecule._rtf.type_by_index[i] for i in range(self.df.molecule.numAtoms) ] self.assertListEqual(types, [ 'ohx0', 'c3x0', 'c3x0', 'ohx0', 'hox0', 'h1', 'h1', 'h1', 'h1', 'hox0' ]) self.df.molecule = FFMolecule(molFile, method=FFTypeMethod.GAFF2) self.df._makeDihedralUnique([5, 1, 2, 7]) types = [ self.df.molecule._rtf.type_by_index[i] for i in range(self.df.molecule.numAtoms) ] self.assertListEqual(types, [ 'oh', 'c3x0', 'c3x0', 'oh', 'ho', 'h1x0', 'h1x0', 'h1x0', 'h1x0', 'ho' ])
def main_scan(): if len(sys.argv) != 2: print("Syntax: %s input.mol" % (sys.argv[0])) sys.exit(0) filename = sys.argv[1] print("Scan {}".format(filename)) mol = FFMolecule(filename=filename, method=FFTypeMethod.CGenFF_2b6) dihedrals = mol.getSoftDihedrals() for d in dihedrals: print("\nScanning dihedral %s-%s-%s-%s" % (mol.name[d[0]], mol.name[d[1]], mol.name[d[2]], mol.name[d[3]])) qmset = mol.scanSoftDihedral(d, directory="scan", step=20) for i in range(len(qmset)): print("%f %f" % (qmset[i].phi, qmset[i].energy))
def _analyse(self, mol, pdb, rtf, prm, traj, ftraj): t = Molecule(pdb) t.read(traj) t.filter('not water') t.write(ftraj) m = FFMolecule(filename=mol, rtf=rtf, prm=prm) m.read(ftraj) torsions = m.getRotatableDihedrals() # For each torsion for i in range(len(torsions)): # Create title title = '{}-{}-{}-{}'.format(m.name[torsions[i][0]], m.name[torsions[i][1]], m.name[torsions[i][2]], m.name[torsions[i][3]]) # Measure (r, theta) = self._measure_torsion(torsions[i], m.coords) self._plot_scatter(r, theta, title) self._plot_hist(theta, title)
def setUp(self): from htmd.home import home from htmd.parameterization.ffmolecule import FFMolecule, FFTypeMethod molFile = os.path.join(home('test-param'), 'H2O2.mol2') self.mol = FFMolecule(molFile, method=FFTypeMethod.GAFF2) self.esp = ESP() self.esp.molecule = self.mol
def test_getEquivalentDihedrals(self): from htmd.home import home from htmd.parameterization.ffmolecule import FFMolecule, FFTypeMethod molFile = os.path.join(home('test-param'), 'glycol.mol2') self.df.molecule = FFMolecule(molFile, method=FFTypeMethod.GAFF2) self.assertListEqual(self.df._getEquivalentDihedrals([0, 1, 2, 3]), [[0, 1, 2, 3]]) self.assertListEqual(self.df._getEquivalentDihedrals([4, 0, 1, 2]), [[4, 0, 1, 2]]) self.assertListEqual(self.df._getEquivalentDihedrals([5, 1, 2, 7]), [[5, 1, 2, 7]])
from tempfile import TemporaryDirectory from htmd.home import home from htmd.parameterization.ffmolecule import FFMolecule from htmd.parameterization.fftype import FFTypeMethod np.random.seed(20170801) # Make the tests deterministic molFile = os.path.join(home('building-protein-ligand'), 'benzamidine.mol2') methods = (FFTypeMethod.CGenFF_2b6, FFTypeMethod.GAFF, FFTypeMethod.GAFF2) # TODO: remove then MATCH is fixed on Mac methods = methods[1:] if os.environ.get( 'TRAVIS_OS_NAME') == 'osx' else methods for method in methods: mol = FFMolecule(molFile, method=method) # Generate random charges for name in mol._rtf.charge_by_name: mol._rtf.charge_by_name[name] = 0.1 * np.random.randn() # Generate a list of original and randomly distorted coordinates coords = mol.coords[:, :, 0] coordsList = [coords] + [ coords + 0.01 * np.random.randn(*coords.shape) for _ in range(9) ] for coords in coordsList: with TemporaryDirectory() as tmpDir:
import os from tempfile import TemporaryDirectory from htmd.home import home from htmd.parameterization.ffmolecule import FFMolecule from htmd.parameterization.fftype import FFTypeMethod np.random.seed(20170801) # Make the tests deterministic molFile = os.path.join(home('building-protein-ligand'), 'benzamidine.mol2') methods = (FFTypeMethod.CGenFF_2b6, FFTypeMethod.GAFF, FFTypeMethod.GAFF2) # TODO: remove then MATCH is fixed on Mac methods = methods[1:] if os.environ.get('TRAVIS_OS_NAME') == 'osx' else methods for method in methods: mol = FFMolecule(molFile, method=method) # Generate random charges for name in mol._rtf.charge_by_name: mol._rtf.charge_by_name[name] = 0.1*np.random.randn() # Generate a list of original and randomly distorted coordinates coords = mol.coords[:, :, 0] coordsList = [coords] + [coords + 0.01*np.random.randn(*coords.shape) for _ in range(9)] for coords in coordsList: with TemporaryDirectory() as tmpDir: if method == FFTypeMethod.CGenFF_2b6: psfFile = os.path.join(tmpDir, 'mol.psf')
def main_parameterize(arguments=None): args = getArgumentParser().parse_args(args=arguments) if not os.path.exists(args.filename): raise ValueError('File %s cannot be found' % args.filename) method_map = {'GAFF': FFTypeMethod.GAFF, 'GAFF2': FFTypeMethod.GAFF2, 'CGENFF': FFTypeMethod.CGenFF_2b6} methods = [method_map[method] for method in args.forcefield] # TODO: move into FFMolecule # Get RTF and PRM file names rtf, prm = None, None if args.rtf_prm: rtf, prm = args.rtf_prm # Create a queue for QM if args.queue == 'local': queue = LocalCPUQueue() elif args.queue == 'Slurm': queue = SlurmQueue(_configapp=args.code.lower()) elif args.queue == 'LSF': queue = LsfQueue(_configapp=args.code.lower()) elif args.queue == 'PBS': queue = PBSQueue() # TODO: configure elif args.queue == 'AceCloud': queue = AceCloudQueue() # TODO: configure else: raise NotImplementedError # Override default ncpus if args.ncpus: logger.info('Overriding ncpus to {}'.format(args.ncpus)) queue.ncpu = args.ncpus # Create a QM object if args.code == 'Psi4': qm = Psi4() elif args.code == 'Gaussian': qm = Gaussian() else: raise NotImplementedError # This is for debugging only! if args.fake_qm: qm = FakeQM() logger.warning('Using FakeQM') # Set up the QM object qm.theory = args.theory qm.basis = args.basis qm.solvent = args.environment qm.queue = queue # List rotatable dihedral angles if args.list: mol = FFMolecule(args.filename, method=methods[0], netcharge=args.charge, rtf=rtf, prm=prm, qm=qm, outdir=args.outdir) print('\n === Parameterizable dihedral angles of %s ===\n' % args.filename) with open('torsions.txt', 'w') as fh: for dihedral in mol.getRotatableDihedrals(): dihedral_name = '%s-%s-%s-%s' % tuple(mol.name[dihedral]) print(' '+dihedral_name) fh.write(dihedral_name+'\n') print() sys.exit(0) # Print arguments print('\n === Arguments ===\n') for key, value in vars(args).items(): print('{:>12s}: {:s}'.format(key, str(value))) print('\n === Parameterizing %s ===\n' % args.filename) for method in methods: print(" === Fitting for %s ===\n" % method.name) # Create the molecule mol = FFMolecule(args.filename, method=method, netcharge=args.charge, rtf=rtf, prm=prm, qm=qm, outdir=args.outdir) mol.printReport() # Copy the molecule to preserve initial coordinates mol_orig = mol.copy() # Update B3LYP to B3LYP-D3 # TODO: this is silent and not documented stuff if qm.theory == 'B3LYP': qm.correction = 'D3' # Update basis sets # TODO: this is silent and not documented stuff if mol.netcharge < 0 and qm.solvent == 'vacuum': if qm.basis == '6-31G*': qm.basis = '6-31+G*' if qm.basis == 'cc-pVDZ': qm.basis = 'aug-cc-pVDZ' logger.info('Changing basis sets to %s' % qm.basis) # Minimize molecule if args.minimize: print('\n == Minimizing ==\n') mol.minimize() # Fit ESP charges if args.fit_charges: print('\n == Fitting ESP charges ==\n') # Set random number generator seed if args.seed: np.random.seed(args.seed) # Select the atoms with fixed charges fixed_atom_indices = [] for fixed_atom_name in args.fix_charge: if fixed_atom_name not in mol.name: raise ValueError('Atom %s is not found. Check --fix-charge arguments' % fixed_atom_name) for aton_index in range(mol.numAtoms): if mol.name[aton_index] == fixed_atom_name: fixed_atom_indices.append(aton_index) logger.info('Charge of atom %s is fixed to %f' % (fixed_atom_name, mol.charge[aton_index])) # Fit ESP charges score, qm_dipole = mol.fitCharges(fixed=fixed_atom_indices) # Print results mm_dipole = mol.getDipole() score = np.sum((qm_dipole[:3] - mm_dipole[:3])**2) print('Charge fitting score: %f\n' % score) print('QM dipole: %f %f %f; %f' % tuple(qm_dipole)) print('MM dipole: %f %f %f; %f' % tuple(mm_dipole)) print('Dipole Chi^2 score: %f\n' % score) # Fit dihedral angle parameters if args.fit_dihedral: print('\n == Fitting dihedral angle parameters ==\n') # Set random number generator seed if args.seed: np.random.seed(args.seed) # Get all rotatable dihedrals all_dihedrals = mol.getRotatableDihedrals() # Choose which dihedrals to fit dihedrals = [] all_dihedral_names = ['-'.join(mol.name[dihedral]) for dihedral in all_dihedrals] for dihedral_name in args.dihedral: if dihedral_name not in all_dihedral_names: raise ValueError('%s is not recognized as a rotatable dihedral angle' % dihedral_name) dihedrals.append(all_dihedrals[all_dihedral_names.index(dihedral_name)]) dihedrals = dihedrals if len(dihedrals) > 0 else all_dihedrals # Set default to all dihedral angles # Fit the parameters mol.fitDihedrals(dihedrals, args.optimize_dihedral) # Output the FF parameters print('\n == Writing results ==\n') mol.writeParameters(mol_orig) # Write energy file energyFile = os.path.join(mol.outdir, 'parameters', method.name, mol.output_directory_name(), 'energies.txt') printEnergies(mol, energyFile) logger.info('Write energy file: %s' % energyFile)
def main_parameterize(): ncpus = os.cpu_count() try: ncpus = int(os.getenv("NCPUS")) except: pass parser = argparse.ArgumentParser(description="Acellera Small Molecule Parameterisation Version 2.0") parser.add_argument("-m", "--mol2", help="Molecule to parameterise, in mol2 format", required=True, type=str, default=None, metavar="<input.mol2>", action="store", dest="mol") parser.add_argument("-l", "--list", "--list-torsions", help="List parameterisable torsions", action="store_true", default=False, dest="list") parser.add_argument("-c", "--charge", help="Net charge on molecule (default: sum of the partial charges on the " ".mol2 file)", type=int, default=None, action="store", dest="charge") parser.add_argument("--rtf", help="Inital RTF parameters (req --prm)", type=str, default=None, dest="rtf") parser.add_argument("--prm", help="Inital PRM parameters (req --rtf)", type=str, default=None, dest="prm") parser.add_argument("-o", "--outdir", help="Output directory (default: %(default)s)", type=str, default="./", dest="outdir") parser.add_argument("-t", "--torsion", metavar="A1-A2-A3-A4", help="Torsion to parameterise (default: %(default)s)", default="all", dest="torsion") parser.add_argument("-n", "--ncpus", help="Number of CPUs to use (default: %(default)s)", default=ncpus, dest="ncpus") parser.add_argument("-f", "--forcefield", help="Inital FF guess to use (default: %(default)s)", choices=["GAFF", "GAFF2", "CGENFF", "all"], default="all") parser.add_argument("-b", "--basis", help="QM Basis Set (default: %(default)s)", choices=["6-31g-star", "cc-pVDZ"], default="cc-pVDZ", dest="basis") parser.add_argument("--theory", help="QM Theory (default: %(default)s)", choices=["RHF", "B3LYP"], default="B3LYP", dest="theory") parser.add_argument("--vacuum", help="Perform QM calculations in vacuum (default: %(default)s)", action="store_true", dest="vacuum", default=False) parser.add_argument("--no-min", help="Do not perform QM minimisation (default: %(default)s)", action="store_true", dest="nomin", default=False) parser.add_argument("--no-esp", help="Do not perform QM charge fitting (default: %(default)s)", action="store_true", dest="noesp", default=False) parser.add_argument("--no-torsions", help="Do not perform torsion fitting (default: %(default)s)", action="store_true", dest="notorsion", default=False) parser.add_argument("-e", "--exec", help="Mode of execution for the QM calculations (default: %(default)s)", choices=["inline", "LSF", "PBS", "Slurm", "AceCloud"], default="inline", dest="exec") parser.add_argument("--qmcode", help="QM code (default: %(default)s)", choices=["Gaussian", "PSI4", "TeraChem"], default="PSI4", dest="qmcode") parser.add_argument("--freeze-charge", metavar="A1", help="Freeze the charge of the named atom (default: %(default)s)", action="append", default=None, dest="freezeq") args = parser.parse_args() # Communicate the # of CPUs to use to the QM engine via environment variable os.environ['NCPUS'] = str(args.ncpus) filename = args.mol if not os.path.exists(filename): print("File {} not found. Please check that the file exists and that the path is correct.".format(filename)) sys.exit(0) if args.qmcode == "Gaussian": code = Code.Gaussian elif args.qmcode == "PSI4": code = Code.PSI4 elif args.qmcode == "TeraChem": code = Code.TeraChem else: print("Unknown QM code: {}".format(args.qmcode)) sys.exit(1) if args.exec == "inline": execution = Execution.Inline elif args.exec == "LSF": execution = Execution.LSF elif args.exec == "PBS": execution = Execution.PBS elif args.exec == "Slurm": execution = Execution.Slurm elif args.exec == "AceCloud": execution = Execution.AceCloud else: print("Unknown execution mode: {}".format(args.exec)) sys.exit(1) if args.forcefield == "CGENFF": methods = [FFTypeMethod.CGenFF_2b6] elif args.forcefield == "GAFF": methods = [FFTypeMethod.GAFF] elif args.forcefield == "GAFF2": methods = [FFTypeMethod.GAFF2] elif args.forcefield == "all": methods = [FFTypeMethod.CGenFF_2b6, FFTypeMethod.GAFF2] else: print("Unknown initial guess force-field: {}".format(args.forcefield)) sys.exit(1) if args.basis == "6-31g-star": basis = BasisSet._6_31G_star elif args.basis == "cc-pVDZ": basis = BasisSet._cc_pVDZ else: print("Unknown basis {}".format(args.basis)) sys.exit(1) if args.theory == "RHF": theory = Theory.RHF elif args.theory == "B3LYP": theory = Theory.B3LYP else: print("Unknown theory %s".format(args.theory)) sys.exit(1) if args.vacuum: solvent = False else: solvent = True # Just list torsions? if args.list: print(" === Listing soft torsions of {} ===\n".format(filename)) mol = FFMolecule(filename=filename, method=methods[0], netcharge=args.charge, rtf=args.rtf, prm=args.prm, basis=basis, theory=theory, solvent=solvent, execution=execution, qmcode=code, outdir=args.outdir) dihedrals = mol.getSoftTorsions() print("Detected soft torsions:") fh=open("torsions.txt", "w") for d in dihedrals: print("\t{}-{}-{}-{}".format(mol.name[d[0]], mol.name[d[1]], mol.name[d[2]], mol.name[d[3]])) print("{}-{}-{}-{}".format(mol.name[d[0]], mol.name[d[1]], mol.name[d[2]], mol.name[d[3]]), file=fh) fh.close() sys.exit(0) # Small report print(" === List of arguments used ===\n") for i in vars(args): print('{:>10s}: {:<10s}'.format(i, str(vars(args)[i]))) print("\n === Parameterizing {} ===\n".format(filename)) for method in methods: sys.stdout.flush() print(" === Fitting for FF %s ===\n" % (method.name)) mol = FFMolecule(filename=filename, method=method, netcharge=args.charge, rtf=args.rtf, prm=args.prm, basis=basis, theory=theory, solvent=solvent, execution=execution, qmcode=code, outdir=args.outdir) dihedrals = mol.getSoftTorsions() if not args.nomin: print("\n == Minimizing ==\n") mol.minimize() sys.stdout.flush() if not args.noesp: print("\n == Charge fitting ==\n") # Select the atoms that are to have frozen charges in the fit fixq = [] if args.freezeq: for i in args.freezeq: found = False for d in range(len(mol.name)): if mol.name[d] == i: ni = d print("Fixing charge for atom %s to %f" % (i, mol.charge[ni])) fixq.append(ni) found = True if not found: raise ValueError(" No atom named %s (--freeze-charge)" % i) (score, qm_dipole, mm_dipole) = mol.fitCharges(fixed=fixq) rating = "GOOD" if score > 1: rating = "CHECK" if score > 10: rating = "BAD" print("Charge Chi^2 score : %f : %s" % (score, rating)) print("QM Dipole : %f %f %f ; %f" % (qm_dipole[0], qm_dipole[1], qm_dipole[2], qm_dipole[3])) print("MM Dipole : %f %f %f ; %f" % (mm_dipole[0], mm_dipole[1], mm_dipole[2], mm_dipole[3])) d = 0. for i in range(3): x = qm_dipole[i] - mm_dipole[i] d = d + x * x rating = "GOOD" if score > 1: rating = "CHECK" print("Dipole Chi^2 score : %f : %s" % (d, rating)) print("") sys.stdout.flush() # Iterative dihedral fitting if not args.notorsion: print("\n == Torsion fitting ==\n") scores = np.zeros(len(dihedrals)) converged = False iteration = 1 while not converged: rets = [] print("\nIteration %d" % iteration) last_scores = scores scores = np.zeros(len(dihedrals)) idx = 0 for d in dihedrals: name = "%s-%s-%s-%s" % (mol.name[d[0]], mol.name[d[1]], mol.name[d[2]], mol.name[d[3]]) if args.torsion == 'all' or name in args.torsion.split(','): print("\n == Fitting torsion {} ==\n".format(name)) try: ret = mol.fitSoftTorsion(d) rets.append(ret) rating = "GOOD" if ret.chisq > 10: rating = "CHECK" if ret.chisq > 100: rating = "BAD" print("Torsion %s Chi^2 score : %f : %s" % (name, ret.chisq, rating)) sys.stdout.flush() scores[idx] = ret.chisq fn = mol.plotTorsionFit(ret, show=False) except: print("Error in fitting") # raise scores[idx] = 0. pass # print(fn) idx += 1 # print(scores) if iteration > 1: converged = True for j in range(len(scores)): # Check convergence relerr = (scores[j] - last_scores[j]) / last_scores[j] convstr = "- converged" if math.fabs(relerr) > 1.e-2: convstr = "" converged = False print(" Dihedral %d relative error : %f %s" % (j, relerr, convstr)) iteration += 1 print(" Fitting converged at iteration %d" % (iteration - 1)) fit = mol.plotConformerEnergies(rets, show=False) print("\n Fit of conformer energies: RMS %f Variance %f" % (fit[0], fit[1])) printEnergies(mol) # Output the ff parameters paramdir = os.path.join(args.outdir, "parameters", method.name, mol.output_directory_name()) print("\n == Output to {} ==\n".format(paramdir)) try: os.makedirs(paramdir, exist_ok=True) except: raise OSError('Directory {} could not be created. Check if you have permissions.'.format(paramdir)) if method.name == "CGenFF_2b6": try: mol._rtf.write(os.path.join(paramdir, "mol.rtf")) mol._prm.write(os.path.join(paramdir, "mol.prm")) for ext in ['psf', 'xyz', 'coor', 'mol2', 'pdb']: mol.write(os.path.join(paramdir, "mol." + ext)) f = open(os.path.join(paramdir, "input.namd"), "w") tmp = '''parameters mol.prm paraTypeCharmm on coordinates mol.pdb bincoordinates mol.coor temperature 0 timestep 0 1-4scaling 1.0 exclude scaled1-4 outputname .out outputenergies 1 structure mol.psf cutoff 20. switching off stepsPerCycle 1 rigidbonds none cellBasisVector1 50. 0. 0. cellBasisVector2 0. 50. 0. cellBasisVector3 0. 0. 50. run 0''' print(tmp, file=f) f.close() except ValueError as e: print("Not writing CHARMM PRM: {}".format(str(e))) elif method.name == "GAFF" or method.name == "GAFF2": try: # types need to be remapped because Amber FRCMOD format limits the type to characters # writeFrcmod does this on the fly and returns a mapping that needs to be applied to the mol typemap = mol._prm.writeFrcmod(mol._rtf, os.path.join(paramdir, "mol.frcmod")) for ext in ['coor', 'mol2', 'pdb']: mol.write(os.path.join(paramdir, "mol." + ext), typemap=typemap) f = open(os.path.join(paramdir, "tleap.in"), "w") tmp = '''loadAmberParams mol.frcmod A = loadMol2 mol.mol2 saveAmberParm A structure.prmtop mol.crd quit''' print(tmp, file=f) f.close() f = open(os.path.join(paramdir, "input.namd"), "w") tmp = '''parmfile structure.prmtop amber on coordinates mol.pdb bincoordinates mol.coor temperature 0 timestep 0 1-4scaling 0.83333333 exclude scaled1-4 outputname .out outputenergies 1 cutoff 20. switching off stepsPerCycle 1 rigidbonds none cellBasisVector1 50. 0. 0. cellBasisVector2 0. 50. 0. cellBasisVector3 0. 0. 50. run 0''' print(tmp, file=f) f.close() except ValueError as e: print("Not writing Amber FRCMOD: {}".format(str(e))) sys.exit(0)
if __name__ == '__main__': import sys import re from htmd.home import home from htmd.parameterization.ffmolecule import FFMolecule # BUG: MATCH does not work on Mac! if 'TRAVIS_OS_NAME' in os.environ: if os.environ['TRAVIS_OS_NAME'] == 'osx': sys.exit(0) molFile = os.path.join(home('building-protein-ligand'), 'benzamidine.mol2') refDir = home(dataDir='test-fftype/benzamidine') mol = FFMolecule(molFile) with TemporaryDirectory() as tmpDir: ff = FFType(mol, method=FFTypeMethod.CGenFF_2b6) ff._rtf.write(os.path.join(tmpDir, 'cgenff.rtf')) ff._prm.write(os.path.join(tmpDir, 'cgenff.prm')) ff = FFType(mol, method=FFTypeMethod.GAFF) ff._prm.writeFrcmod(ff._rtf, os.path.join(tmpDir, 'gaff.frcmod')) ff = FFType(mol, method=FFTypeMethod.GAFF2) ff._prm.writeFrcmod(ff._rtf, os.path.join(tmpDir, 'gaff2.frcmod')) for testFile in os.listdir(refDir): print(testFile)
def main_parameterize(arguments=None): args = getArgumentParser().parse_args(args=arguments) if not os.path.exists(args.filename): raise ValueError('File %s cannot be found' % args.filename) method_map = { 'GAFF': FFTypeMethod.GAFF, 'GAFF2': FFTypeMethod.GAFF2, 'CGENFF': FFTypeMethod.CGenFF_2b6 } methods = [method_map[method] for method in args.forcefield] # TODO: move into FFMolecule # Get RTF and PRM file names rtf, prm = None, None if args.rtf_prm: rtf, prm = args.rtf_prm # Create a queue for QM if args.queue == 'local': queue = LocalCPUQueue() elif args.queue == 'Slurm': queue = SlurmQueue(_configapp=args.code.lower()) elif args.queue == 'LSF': queue = LsfQueue(_configapp=args.code.lower()) elif args.queue == 'PBS': queue = PBSQueue() # TODO: configure elif args.queue == 'AceCloud': queue = AceCloudQueue() # TODO: configure queue.groupname = args.groupname queue.hashnames = True else: raise NotImplementedError # Override default ncpus if args.ncpus: logger.info('Overriding ncpus to {}'.format(args.ncpus)) queue.ncpu = args.ncpus if args.memory: logger.info('Overriding memory to {}'.format(args.memory)) queue.memory = args.memory # Create a QM object if args.code == 'Psi4': qm = Psi4() elif args.code == 'Gaussian': qm = Gaussian() else: raise NotImplementedError # This is for debugging only! if args.fake_qm: qm = FakeQM2() logger.warning('Using FakeQM') # Set up the QM object qm.theory = args.theory qm.basis = args.basis qm.solvent = args.environment qm.queue = queue # List rotatable dihedral angles if args.list: mol = FFMolecule(args.filename, method=methods[0], netcharge=args.charge, rtf=rtf, prm=prm, qm=qm, outdir=args.outdir) print('\n === Parameterizable dihedral angles of %s ===\n' % args.filename) with open('torsions.txt', 'w') as fh: for dihedral in mol.getRotatableDihedrals(): dihedral_name = '%s-%s-%s-%s' % tuple(mol.name[dihedral]) print(' ' + dihedral_name) fh.write(dihedral_name + '\n') print() sys.exit(0) # Print arguments print('\n === Arguments ===\n') for key, value in vars(args).items(): print('{:>12s}: {:s}'.format(key, str(value))) print('\n === Parameterizing %s ===\n' % args.filename) for method in methods: print(" === Fitting for %s ===\n" % method.name) # Create the molecule mol = FFMolecule(args.filename, method=method, netcharge=args.charge, rtf=rtf, prm=prm, qm=qm, outdir=args.outdir) mol.printReport() # Copy the molecule to preserve initial coordinates mol_orig = mol.copy() # Update B3LYP to B3LYP-D3 # TODO: this is silent and not documented stuff if qm.theory == 'B3LYP': qm.correction = 'D3' # Update basis sets # TODO: this is silent and not documented stuff if mol.netcharge < 0 and qm.solvent == 'vacuum': if qm.basis == '6-31G*': qm.basis = '6-31+G*' if qm.basis == 'cc-pVDZ': qm.basis = 'aug-cc-pVDZ' logger.info('Changing basis sets to %s' % qm.basis) # Minimize molecule if args.minimize: print('\n == Minimizing ==\n') mol.minimize() # Fit ESP charges if args.fit_charges: print('\n == Fitting ESP charges ==\n') # Set random number generator seed if args.seed: np.random.seed(args.seed) # Select the atoms with fixed charges fixed_atom_indices = [] for fixed_atom_name in args.fix_charge: if fixed_atom_name not in mol.name: raise ValueError( 'Atom %s is not found. Check --fix-charge arguments' % fixed_atom_name) for aton_index in range(mol.numAtoms): if mol.name[aton_index] == fixed_atom_name: fixed_atom_indices.append(aton_index) logger.info('Charge of atom %s is fixed to %f' % (fixed_atom_name, mol.charge[aton_index])) # Fit ESP charges _, qm_dipole = mol.fitCharges(fixed=fixed_atom_indices) # Copy the new charges to the original molecule mol_orig.charge[:] = mol.charge # Print dipoles logger.info('QM dipole: %f %f %f; %f' % tuple(qm_dipole)) mm_dipole = mol.getDipole() if np.all(np.isfinite(mm_dipole)): logger.info('MM dipole: %f %f %f; %f' % tuple(mm_dipole)) else: logger.warning( 'MM dipole cannot be computed. Check if elements are detected correctly.' ) # Fit dihedral angle parameters if args.fit_dihedral: print('\n == Fitting dihedral angle parameters ==\n') # Set random number generator seed if args.seed: np.random.seed(args.seed) # Get all rotatable dihedrals all_dihedrals = mol.getRotatableDihedrals() # Choose which dihedrals to fit dihedrals = [] all_dihedral_names = [ '-'.join(mol.name[dihedral]) for dihedral in all_dihedrals ] for dihedral_name in args.dihedral: if dihedral_name not in all_dihedral_names: raise ValueError( '%s is not recognized as a rotatable dihedral angle' % dihedral_name) dihedrals.append( all_dihedrals[all_dihedral_names.index(dihedral_name)]) dihedrals = dihedrals if len( dihedrals ) > 0 else all_dihedrals # Set default to all dihedral angles # Fit the parameters mol.fitDihedrals(dihedrals, args.optimize_dihedral) # Output the FF parameters print('\n == Writing results ==\n') mol.writeParameters(mol_orig) # Write energy file energyFile = os.path.join(mol.outdir, 'parameters', method.name, mol.output_directory_name(), 'energies.txt') printEnergies(mol, energyFile) logger.info('Write energy file: %s' % energyFile)
def main_parameterize(arguments=None): args = cli_parser().parse_args(args=arguments) from htmd.parameterization.ffmolecule import FFMolecule, FFEvaluate from htmd.parameterization.fftype import FFTypeMethod from htmd.qm.qmcalculation import Theory, BasisSet, Execution, Code import numpy as np import math def printEnergies(m): print("\n == Diagnostic Energies == ") ffe = FFEvaluate(m) energies = ffe.evaluate(m.coords[:, :, 0]) print("") print(" Bond : %f" % (energies['bond'])) print(" Angle : %f" % (energies['angle'])) print(" Dihedral : %f" % (energies['dihedral'])) print(" Improper : %f" % (energies['improper'])) print(" Electro : %f" % (energies['elec'])) print(" VdW : %f" % (energies['vdw'])) print("") # Communicate the # of CPUs to use to the QM engine via environment variable os.environ['NCPUS'] = str(args.ncpus) filename = args.mol if not os.path.exists(filename): print( "File {} not found. Please check that the file exists and that the path is correct." .format(filename)) sys.exit(0) if args.qmcode == "Gaussian": code = Code.Gaussian elif args.qmcode == "PSI4": code = Code.PSI4 elif args.qmcode == "TeraChem": code = Code.TeraChem else: print("Unknown QM code: {}".format(args.qmcode)) sys.exit(1) if args.exec == "inline": execution = Execution.Inline elif args.exec == "LSF": execution = Execution.LSF elif args.exec == "Slurm": execution = Execution.Slurm else: print("Unknown execution mode: {}".format(args.exec)) sys.exit(1) if args.forcefield == "CGENFF": methods = [FFTypeMethod.CGenFF_2b6] elif args.forcefield == "GAFF": methods = [FFTypeMethod.GAFF] elif args.forcefield == "GAFF2": methods = [FFTypeMethod.GAFF2] elif args.forcefield == "all": methods = [FFTypeMethod.CGenFF_2b6, FFTypeMethod.GAFF2] else: print("Unknown initial guess force-field: {}".format(args.forcefield)) sys.exit(1) if args.basis == "6-31g-star": basis = BasisSet._6_31G_star elif args.basis == "cc-pVDZ": basis = BasisSet._cc_pVDZ else: print("Unknown basis {}".format(args.basis)) sys.exit(1) if args.theory == "RHF": theory = Theory.RHF elif args.theory == "B3LYP": theory = Theory.B3LYP else: print("Unknown theory %s".format(args.theory)) sys.exit(1) if args.vacuum: solvent = False else: solvent = True # Just list torsions? if args.list: print(" === Listing soft torsions of {} ===\n".format(filename)) mol = FFMolecule(filename=filename, method=methods[0], netcharge=args.charge, rtf=args.rtf, prm=args.prm, basis=basis, theory=theory, solvent=solvent, execution=execution, qmcode=code, outdir=args.outdir) dihedrals = mol.getSoftTorsions() print("Detected soft torsions:") fh = open("torsions.txt", "w") for d in dihedrals: print("\t{}-{}-{}-{}".format(mol.name[d[0]], mol.name[d[1]], mol.name[d[2]], mol.name[d[3]])) print("{}-{}-{}-{}".format(mol.name[d[0]], mol.name[d[1]], mol.name[d[2]], mol.name[d[3]]), file=fh) fh.close() sys.exit(0) # Small report print(" === List of arguments used ===\n") for i in vars(args): print('{:>10s}: {:<10s}'.format(i, str(vars(args)[i]))) print("\n === Parameterizing {} ===\n".format(filename)) for method in methods: sys.stdout.flush() print(" === Fitting for FF %s ===\n" % method.name) mol = FFMolecule(filename=filename, method=method, netcharge=args.charge, rtf=args.rtf, prm=args.prm, basis=basis, theory=theory, solvent=solvent, execution=execution, qmcode=code, outdir=args.outdir) dihedrals = mol.getSoftTorsions() mol_orig = mol.copy() if not args.nomin: print("\n == Minimizing ==\n") mol.minimize() sys.stdout.flush() if not args.noesp: print("\n == Charge fitting ==\n") # Select the atoms that are to have frozen charges in the fit fixq = [] if args.freezeq: for i in args.freezeq: found = False for d in range(len(mol.name)): if mol.name[d] == i: ni = d print("Fixing charge for atom %s to %f" % (i, mol.charge[ni])) fixq.append(ni) found = True if not found: raise ValueError( " No atom named %s (--freeze-charge)" % i) (score, qm_dipole, mm_dipole) = mol.fitCharges(fixed=fixq) rating = "GOOD" if score > 1: rating = "CHECK" if score > 10: rating = "BAD" print("Charge Chi^2 score : %f : %s" % (score, rating)) print("QM Dipole : %f %f %f ; %f" % (qm_dipole[0], qm_dipole[1], qm_dipole[2], qm_dipole[3])) print("MM Dipole : %f %f %f ; %f" % (mm_dipole[0], mm_dipole[1], mm_dipole[2], mm_dipole[3])) d = 0. for i in range(3): x = qm_dipole[i] - mm_dipole[i] d = d + x * x rating = "GOOD" if score > 1: rating = "CHECK" print("Dipole Chi^2 score : %f : %s" % (d, rating)) print("") sys.stdout.flush() # Iterative dihedral fitting if not args.notorsion: print("\n == Torsion fitting ==\n") scores = np.ones(len(dihedrals)) converged = False iteration = 1 ref_mm = dict() while not converged: rets = [] print("\nIteration %d" % iteration) last_scores = scores scores = np.zeros(len(dihedrals)) idx = 0 for d in dihedrals: name = "%s-%s-%s-%s" % (mol.name[d[0]], mol.name[d[1]], mol.name[d[2]], mol.name[d[3]]) if args.torsion == 'all' or name in args.torsion.split( ','): print("\n == Fitting torsion {} ==\n".format(name)) try: ret = mol.fitSoftTorsion(d, geomopt=args.geomopt) rets.append(ret) if iteration == 1: ref_mm[name] = ret rating = "GOOD" if ret.chisq > 10: rating = "CHECK" if ret.chisq > 100: rating = "BAD" print("Torsion %s Chi^2 score : %f : %s" % (name, ret.chisq, rating)) sys.stdout.flush() scores[idx] = ret.chisq # Always use the mm_orig from first iteration (unmodified) ret.mm_original = ref_mm[name].mm_original phi_original = ref_mm[name].phi fn = mol.plotTorsionFit(ret, phi_original, show=False) except Exception as e: print("Error in fitting") print(str(e)) raise scores[idx] = 0. pass # print(fn) idx += 1 # print(scores) if iteration > 1: converged = True for j in range(len(scores)): # Check convergence try: relerr = (scores[j] - last_scores[j]) / last_scores[j] except: relerr = 0. if math.isnan(relerr): relerr = 0. convstr = "- converged" if math.fabs(relerr) > 1.e-2: convstr = "" converged = False print(" Dihedral %d relative error : %f %s" % (j, relerr, convstr)) iteration += 1 print(" Fitting converged at iteration %d" % (iteration - 1)) if len(rets): fit = mol.plotConformerEnergies(rets, show=False) print("\n Fit of conformer energies: RMS %f Variance %f" % (fit[0], fit[1])) printEnergies(mol) # Output the ff parameters paramdir = os.path.join(args.outdir, "parameters", method.name, mol.output_directory_name()) print("\n == Output to {} ==\n".format(paramdir)) try: os.makedirs(paramdir, exist_ok=True) except: raise OSError( 'Directory {} could not be created. Check if you have permissions.' .format(paramdir)) if method.name == "CGenFF_2b6": try: mol._rtf.write(os.path.join(paramdir, "mol.rtf")) mol._prm.write(os.path.join(paramdir, "mol.prm")) for ext in ['psf', 'xyz', 'coor', 'mol2', 'pdb']: mol.write(os.path.join(paramdir, "mol." + ext)) mol_orig.write(os.path.join(paramdir, "mol-orig.mol2")) f = open(os.path.join(paramdir, "input.namd"), "w") tmp = '''parameters mol.prm paraTypeCharmm on coordinates mol.pdb bincoordinates mol.coor temperature 0 timestep 0 1-4scaling 1.0 exclude scaled1-4 outputname .out outputenergies 1 structure mol.psf cutoff 20. switching off stepsPerCycle 1 rigidbonds none cellBasisVector1 50. 0. 0. cellBasisVector2 0. 50. 0. cellBasisVector3 0. 0. 50. run 0''' print(tmp, file=f) f.close() except ValueError as e: print("Not writing CHARMM PRM: {}".format(str(e))) elif method.name == "GAFF" or method.name == "GAFF2": try: # types need to be remapped because Amber FRCMOD format limits the type to characters # writeFrcmod does this on the fly and returns a mapping that needs to be applied to the mol typemap = mol._prm.writeFrcmod( mol._rtf, os.path.join(paramdir, "mol.frcmod")) for ext in ['coor', 'mol2', 'pdb']: mol.write(os.path.join(paramdir, "mol." + ext), typemap=typemap) mol_orig.write(os.path.join(paramdir, "mol-orig.mol2"), typemap=typemap) f = open(os.path.join(paramdir, "tleap.in"), "w") tmp = '''loadAmberParams mol.frcmod A = loadMol2 mol.mol2 saveAmberParm A structure.prmtop mol.crd quit''' print(tmp, file=f) f.close() f = open(os.path.join(paramdir, "input.namd"), "w") tmp = '''parmfile structure.prmtop amber on coordinates mol.pdb bincoordinates mol.coor temperature 0 timestep 0 1-4scaling 0.83333333 exclude scaled1-4 outputname .out outputenergies 1 cutoff 20. switching off stepsPerCycle 1 rigidbonds none cellBasisVector1 50. 0. 0. cellBasisVector2 0. 50. 0. cellBasisVector3 0. 0. 50. run 0''' print(tmp, file=f) f.close() except ValueError as e: print("Not writing Amber FRCMOD: {}".format(str(e))) sys.exit(0)