def write_fb_target_abinitio(self, records): """ Write a list of {'energy': xxx, 'molecule': xxx, 'name': xxx} records into a new target folder """ # prepare folder for writing target_name = 'abinitio_bond_angles' target_folder = os.path.join(self.out_folder, target_name) os.mkdir(target_folder) os.chdir(target_folder) # load data into a fb Molecule out_m = Molecule() out_m.elem = self.m.elem.copy() out_m.xyzs = [] out_m.qm_energies = [] out_m.comms = [] for record in records: qcmol = record['molecule'] energy = record['energy'] name = record.get('name', 'created by FBTargetBuilder') m = self.qc_molecule_to_fb_molecule(qcmol) assert m.elem == out_m.elem, 'Elements list of resulting qcmol is not consistent with self.m' # append geometry out_m.xyzs.append(m.xyzs[0]) # append energy out_m.qm_energies.append(energy) # append name out_m.comms.append(name) # write output print( f"Writing {len(records)} frames into targets/abinitio_bond_angles/traj.xyz" ) out_m.write('traj.xyz') print( f"Writing {len(records)} frames into targets/abinitio_bond_angles/qdata.txt" ) out_m.write('qdata.txt')
def qc_molecule_to_fb_molecule(self, qc_molecule): """ Convert an qcportal.Molecule object to a forcebalance.molecule.Molecule object""" m = Molecule() m.elem = [Elements[i] for i in qc_molecule.atomic_numbers] m.xyzs = [qc_molecule.geometry * bohr2ang] m.molecular_charge = qc_molecule.molecular_charge m.mult = qc_molecule.molecular_multiplicity return m
def __init__(self, molecule=None): # molecule.py can not parse psi4 input yet, so we use self.load_psi4_input() as a walk around if molecule is None: # create a fake molecule molecule = Molecule() molecule.elem = ['H'] molecule.xyzs = [[[0, 0, 0]]] super(Psi4, self).__init__(molecule) self.threads = None
def write_molecule_files(molecule_data_list): molecule, e0 = molecule_data_list[0] qcjson_mol = molecule.dict(encoding='json') oemol = cmiles.utils.load_molecule(qcjson_mol) # write the mol2 file using oechem ofs.open(f'input.mol2') oechem.OEWriteMolecule(ofs, oemol) ofs.close() # write the pdb file using ForceBalance Molecule fbmol = Molecule(f'input.mol2') fbmol.write(f'conf.pdb') # write xyz file using a new ForceBalance Molecule object m = Molecule() m.elem = [Elements[i] for i in molecule.atomic_numbers] m.xyzs = [] m.qm_energies = [] for mol, e in molecule_data_list: m.xyzs.append(mol.geometry * bohr2ang) m.qm_energies.append(e) m.write("coords.xyz") # write qdata.txt file with coords and energies m.write('qdata.txt')
def read_sdf_to_fb_mol(filename): """ read sdf file and return ForceBalance.molecule.Molecule object """ from forcebalance.molecule import Molecule, Elements import numpy as np mol_list = read_split_mols(filename) assert len(mol_list) == 1, 'file contains multiple molecules' oe_mol = mol_list[0] # create a new molecule fb_mol = Molecule() # load elems fb_mol.elem = [Elements[a.GetAtomicNum()] for a in oe_mol.GetAtoms()] noa = len(fb_mol.elem) # load coordinates coords_dict = oe_mol.GetCoords() fb_mol.xyzs = [np.array([coords_dict[i] for i in range(noa)])] # load bonds bonds, bond_orders = [], [] for oe_bond in oe_mol.GetBonds(): idx_a = oe_bond.GetBgnIdx() idx_b = oe_bond.GetEndIdx() bond = (idx_a, idx_b) if idx_a <= idx_b else (idx_b, idx_a) bonds.append(bond) bond_orders.append(oe_bond.GetOrder()) fb_mol.bonds = bonds fb_mol.bond_orders = bond_orders # load atomic formal charges atomic_formal_charges = [a.GetFormalCharge() for a in oe_mol.GetAtoms()] molecular_charge = sum(atomic_formal_charges) fb_mol.Data['molecular_charge'] = molecular_charge fb_mol.Data['atomic_formal_charges'] = atomic_formal_charges # set the oe_mol as one attribute fb_mol.oe_mol = oe_mol # set the cmiles id mapped_smiles = cmiles.utils.mol_to_smiles(oe_mol) fb_mol.Data['cmiles_id'] = cmiles.generator.get_molecule_ids(mapped_smiles) return fb_mol
from forcebalance.molecule import Molecule from forcebalance.readfrq import read_frq_gen # Frequency output file. fout = sys.argv[1] # Mode number, starting from 1. modenum = int(sys.argv[2]) if modenum == 0: raise RuntimeError("Start mode number from one, please") frqs, modes, intens, elem, xyz = read_frq_gen(fout) M = Molecule() M.elem = elem[:] M.xyzs = [] xmode = modes[modenum - 1] xmode /= (np.linalg.norm(xmode)/np.sqrt(M.na)) xmode *= 0.3 # Reasonable vibrational amplitude spac = np.linspace(0, 1, 101) disp = np.concatenate((spac, spac[::-1][1:], -1*spac[1:], -1*spac[::-1][1:-1])) for i in disp: M.xyzs.append(xyz+i*xmode.reshape(-1,3)) M.comms = ['Vibrational Mode %i Frequency %.3f Displacement %.3f' % (modenum, frqs[modenum-1], disp[i]*(np.linalg.norm(xmode)/np.sqrt(M.na))) for i in range(len(M))] M.write(os.path.splitext(fout)[0]+'.mode%03i.xyz' % modenum)
def gen_tid_calculated_molecules_list(torsiondrive_data, forcefield, verbose=False): # gen dictionary with keys, including all tids in the input forcefield ff_torsion_param_list = forcefield.get_parameter_handler( 'ProperTorsions').parameters tid_calculated_molecules_list = {} molecules_list_dict_from_td = defaultdict = {} for torsion_param in ff_torsion_param_list: tid_calculated_molecules_list[torsion_param.id] = [] if os.path.exists('tmp'): shutil.rmtree('tmp') os.mkdir('tmp') os.chdir('tmp') for entry_index, td_data in torsiondrive_data.items(): # pick a single initial molecule qcmol = td_data['initial_molecules'][0] # write input.mol2 file qcjson_mol = qcmol.dict(encoding='json') oemol = cmiles.utils.load_molecule(qcjson_mol) ofs.open(f'input.mol2') oechem.OEWriteMolecule(ofs, oemol) ofs.close() # test mol2 file success, msg, molecule_labels = test_ff_mol2(forcefield, 'input.mol2') if not success: if verbose == True: print( 'Error occured while testing input.mol2. Excluded in tid_calculated_molecules_list. ' ) continue # check if the torsion scan contains one or more conformers forming strong internal H bonds if success: # write conf.pdb file fbmol = FBMolecule(f'input.mol2') # list of grid ids sorted sorted_grid_ids = sorted(td_data['final_molecules'].keys()) # write scan.xyz target_mol = FBMolecule() target_mol.elem = fbmol.elem target_mol.xyzs = [] target_mol.qm_energies = [] target_mol.qm_grads = [] for grid_id in sorted_grid_ids: grid_qc_mol = td_data['final_molecules'][grid_id] # convert geometry unit Bohr -> Angstrom geo = grid_qc_mol.geometry * 0.529177 target_mol.xyzs.append(geo) # add energy and gradient target_mol.qm_energies.append( td_data['final_energies'][grid_id]) target_mol.qm_grads.append(td_data['final_gradients'][grid_id]) target_mol.write('scan.xyz') no_hbonds = check_Hbond(scan_fnm='scan.xyz', top_fnm='input.mol2') if not no_hbonds: if verbose == True: print( 'Internal hydrogen bond detacted. Excluded in tid_calculated_molecules_list. ' ) success = False if success: mol_index = td_data['attributes']["canonical_isomeric_smiles"] indices = td_data['keywords']['dihedrals'][0] tid = molecule_labels['ProperTorsions'][tuple(indices)].id # qcschema_molecules = [qcmol.dict(encoding='json') for qcmol in td_data['initial_molecules']] tid_calculated_molecules_list[tid].append({ 'mol_index': mol_index, 'indices': indices }) qcschema_molecules = [] for qcmol in td_data['initial_molecules']: j_dict = qcmol.dict(encoding='json') qcschema_molecule = { 'symbols': j_dict['symbols'], 'geometry': j_dict['geometry'], 'connectivity': j_dict['connectivity'], 'molecular_charge': j_dict['molecular_charge'], 'molecular_multiplicity': j_dict['molecular_multiplicity'] } qcschema_molecules.append(qcschema_molecule) molecules_list_dict_from_td[mol_index] = qcschema_molecules print("\n## Available torsion scans from QCArchive ##\n" + '-' * 90) print(f"{'idx':<7} {'tid':7s} {'Number of torsion scans'}") for idx, (tid, molecules_list) in enumerate( tid_calculated_molecules_list.items()): if len(molecules_list) > 0: print(f'{idx:<7} {tid:7s} {len(molecules_list)}') print('-' * 90) os.chdir('..') shutil.rmtree('tmp') return tid_calculated_molecules_list, molecules_list_dict_from_td
def make_fb_targets(): result_mol_folders = [ os.path.join(results_folder, f) for f in os.listdir(results_folder) if os.path.isdir(os.path.join(results_folder, f)) ] result_mol_folders.sort() print( f"\nLoading data from {len(result_mol_folders)} result folders under {results_folder}" ) # output folder if os.path.exists(out_folder): shutil.rmtree(out_folder) os.mkdir(out_folder) target_names = [] for mol_folder in result_mol_folders: mol_name = os.path.basename(mol_folder) # the name of the molecules should be consistent with the mol_folder mol_file = os.path.join(molecules_folder, mol_name + '.mol2') molecule = Molecule(mol_file) # find all torsion data finished_scans = [] for f in os.listdir(mol_folder): name, ext = os.path.splitext(f) if ext == '.xyz': finished_scans.append(name) if len(finished_scans) == 0: print(f'No finished scans found in {mol_folder}') continue # output target name target_name = 'td_' + mol_name target_names.append(target_name) # make target folder this_target_folder = os.path.join(out_folder, target_name) os.mkdir(this_target_folder) # read data from each finished scans target_mol = Molecule() target_mol.elem = molecule.elem target_mol.xyzs = [] target_mol.qm_energies = [] target_mol.qm_grads = [] for f in finished_scans: xyz_file = os.path.join(mol_folder, f + '.xyz') m = Molecule(xyz_file) target_mol.xyzs += m.xyzs # read energy from comment line energies = [float(comm.split()[-1]) for comm in m.comms] target_mol.qm_energies += energies # read gradient grad_file = os.path.join(mol_folder, f + '.gradxyz') grads = read_gradxyz(grad_file) target_mol.qm_grads += grads # write qdata.txt target_mol.write(os.path.join(this_target_folder, 'qdata.txt')) # write scan.xyz target_mol.write(os.path.join(this_target_folder, 'scan.xyz')) # write pdb molecule.write(os.path.join(this_target_folder, 'conf.pdb')) # copy mol2 file shutil.copyfile(mol_file, os.path.join(this_target_folder, 'input.mol2')) # write a note with open(os.path.join(this_target_folder, 'notes.txt'), 'w') as fnote: fnote.write( "Notes: This target is made by make_fb_targets.py, using data from\n" ) fnote.write(mol_file + '\n') for f in finished_scans: xyz_file = os.path.join(mol_folder, f + '.xyz') grad_file = os.path.join(mol_folder, f + '.gradxyz') fnote.write(xyz_file + '\n') fnote.write(grad_file + '\n') # write a target.in file for use in ForceBalance input with open(os.path.join(out_folder, 'targets.in'), 'w') as fout: for tname in target_names: fout.write(target_str.format(name=tname) + '\n') print(f"Targets generation finished!") print( f"You can copy contents in {os.path.join(out_folder, 'targets.in')} to your ForceBalance input file." )
def make_torsiondrive_target(dataset_name, torsiondrive_data, test_ff=None): """ Make a folder of ForceBalance targets from the torsiondrive data """ target_name_prefix = 'td_' + dataset_name.replace(' ', '_') # create new targets folder if os.path.exists('targets'): shutil.rmtree('targets') os.mkdir('targets') os.chdir('targets') # write each entry as an individual target target_idx = 0 n_targets = len(torsiondrive_data) idx_fmt_string = get_int_fmt_string(n_targets) target_names = [] print(f"Generating {n_targets} targets") for entry_index, td_data in torsiondrive_data.items(): # pick a single initial molecule qcmol = td_data['initial_molecules'][0] # get mol_formula mol_formula = qcmol.get_molecular_formula() # create target folder target_idx_str = idx_fmt_string.format(target_idx) target_name = f"{target_name_prefix}_{target_idx_str}_{mol_formula}" print(f"{target_idx}: {target_name}") os.mkdir(target_name) os.chdir(target_name) # write a note with open('note.txt', 'w') as notefile: notefile.write( f'Target generated from dataset {dataset_name}, entry {entry_index}' ) # write input.mol2 file qcjson_mol = qcmol.dict(encoding='json') oemol = cmiles.utils.load_molecule(qcjson_mol) ofs.open(f'input.mol2') oechem.OEWriteMolecule(ofs, oemol) ofs.close() # test mol2 file success = True if test_ff != None: success, msg, molecule_labels = test_ff_mol2(test_ff, 'input.mol2') if not success: if not os.path.exists('../error_mol2s'): os.mkdir('../error_mol2s') shutil.move(f'input.mol2', f'../error_mol2s/{target_name}.mol2') with open(f'../error_mol2s/{target_name}_error.txt', 'w') as notefile: notefile.write(f'{dataset_name}\ntarget_name {target_name}\n') notefile.write( f'entry {entry_index}\ntd_keywords {td_data["keywords"]}\n' ) notefile.write(f'error message:\n{msg}') # remove this folder os.chdir('..') shutil.rmtree(target_name) else: # write conf.pdb file fbmol = Molecule(f'input.mol2') fbmol.write(f'conf.pdb') # list of grid ids sorted sorted_grid_ids = sorted(td_data['final_molecules'].keys()) # write scan.xyz and qdata.txt files target_mol = Molecule() target_mol.elem = fbmol.elem target_mol.xyzs = [] target_mol.qm_energies = [] target_mol.qm_grads = [] for grid_id in sorted_grid_ids: grid_qc_mol = td_data['final_molecules'][grid_id] # convert geometry unit Bohr -> Angstrom geo = grid_qc_mol.geometry * 0.529177 target_mol.xyzs.append(geo) # add energy and gradient target_mol.qm_energies.append( td_data['final_energies'][grid_id]) target_mol.qm_grads.append(td_data['final_gradients'][grid_id]) target_mol.write('scan.xyz') target_mol.write('qdata.txt') # check if the torsion scan contains one or more conformers forming strong internal H bonds no_hbonds, hbonds = screening_Hbond(mol2_fnm='input.mol2', scan_fnm='scan.xyz') if no_hbonds != True: msg = 'One or more internal H bonds exist.' if not os.path.exists('../error_mol2s'): os.mkdir('../error_mol2s') shutil.move(f'input.mol2', f'../error_mol2s/{target_name}.mol2') with open(f'../error_mol2s/{target_name}_error.txt', 'w') as notefile: notefile.write( f'{dataset_name}\ntarget_name {target_name}\n') notefile.write( f'entry {entry_index}\ntd_keywords {td_data["keywords"]}\n' ) notefile.write(f'error message:\n{msg}') # remove this folder os.chdir('..') shutil.rmtree(target_name) else: # pick metadata to write into the metadata.json file metadata = copy.deepcopy(td_data['keywords']) metadata['dataset_name'] = dataset_name metadata['entry_label'] = entry_index metadata['canonical_smiles'] = td_data['attributes'].get( 'canonical_smiles', 'unknown') metadata['torsion_grid_ids'] = sorted_grid_ids # find SMIRKs for torsion being scaned if test_ff is provided if test_ff: metadata['smirks'] = [] metadata['smirks_ids'] = [] for torsion_indices in td_data['keywords']['dihedrals']: param = molecule_labels['ProperTorsions'][tuple( torsion_indices)] metadata['smirks'].append(param.smirks) metadata['smirks_ids'].append(param.id) with open('metadata.json', 'w') as jsonfile: json.dump(metadata, jsonfile, indent=2) # finish this target target_names.append(target_name) os.chdir('..') target_idx += 1 # write targets.{dataset_name}.in file target_in_fnm = f"targets.{dataset_name.replace(' ', '_')}.in" with open(target_in_fnm, 'w') as outfile: for target_name in target_names: outfile.write(target_in_str.format(name=target_name)) print(f"Successfull generated {len(target_names)} targets.") print( f"You can copy contents in {target_in_fnm} to your ForceBalance input file." ) os.chdir('..')
espval.append(float(s[3])) elif len(espxyz) > 0: # After reading in a block of ESPs, don't read any more. ESPMode = -1 if line.strip().startswith("Geometry (in Angstrom)"): XMode = 1 EMode = len(elem) == 0 if 'Electrostatic Potential' in line.strip() and ESPMode == 0: ESPMode = 1 if len(xyzs) == 0: raise Exception('%s has length zero' % psiout) return xyzs, elem, espxyz, espval xyzs, elem, espxyz, espval = read_psi_xyzesp(sys.argv[1]) M = Molecule() M.xyzs = xyzs M.elem = elem M.write('%s.xyz' % os.path.splitext(sys.argv[1])[0]) EM = Molecule() EM.xyzs = [np.array(espxyz) * 0.52917721092] EM.elem = ['H' for i in range(len(espxyz))] EM.write('%s.espx' % os.path.splitext(sys.argv[1])[0], ftype="xyz") M.qm_espxyzs = EM.xyzs M.qm_espvals = [np.array(espval)] M.write("qdata.txt") np.savetxt('%s.esp' % os.path.splitext(sys.argv[1])[0], espval)
elif len(espxyz) > 0: # After reading in a block of ESPs, don't read any more. ESPMode = -1 if line.strip().startswith("Geometry (in Angstrom)"): XMode = 1 EMode = len(elem) == 0 if 'Electrostatic Potential' in line.strip() and ESPMode == 0: ESPMode = 1 if len(xyzs) == 0: raise Exception('%s has length zero' % psiout) return xyzs, elem, espxyz, espval xyzs, elem, espxyz, espval = read_psi_xyzesp(sys.argv[1]) M = Molecule() M.xyzs = xyzs M.elem = elem M.write('%s.xyz' % os.path.splitext(sys.argv[1])[0]) EM = Molecule() EM.xyzs = [np.array(espxyz) * 0.52917721092] EM.elem = ['H' for i in range(len(espxyz))] EM.write('%s.espx' % os.path.splitext(sys.argv[1])[0], ftype="xyz") M.qm_espxyzs = EM.xyzs M.qm_espvals = [np.array(espval)] M.write("qdata.txt") np.savetxt('%s.esp' % os.path.splitext(sys.argv[1])[0], espval)
import os, sys, re import numpy as np from forcebalance.molecule import Molecule from forcebalance.readfrq import read_frq_psi # Psi4 output file. psiout = sys.argv[1] # Mode number, starting from 1. modenum = int(sys.argv[2]) frqs, modes, elem, xyz = read_frq_psi(psiout) M = Molecule() M.elem = elem[:] M.xyzs = [] xmode = modes[modenum - 1] xmode /= (np.linalg.norm(xmode)/np.sqrt(M.na)) xmode *= 0.3 # Reasonable vibrational amplitude spac = np.linspace(0, 1, 101) disp = np.concatenate((spac, spac[::-1][1:], -1*spac[1:], -1*spac[::-1][1:-1])) for i in disp: M.xyzs.append(xyz+i*xmode) M.comms = ['Vibrational Mode %i Frequency %.3f Displacement %.3f' % (modenum, frqs[modenum-1], disp[i]*(np.linalg.norm(xmode)/np.sqrt(M.na))) for i in range(len(M))] M.write(os.path.splitext(psiout)[0]+'.mode%03i.xyz' % modenum)