def write_fb_target_abinitio(self, records):
     """ Write a list of {'energy': xxx, 'molecule': xxx, 'name': xxx} records into a new target folder """
     # prepare folder for writing
     target_name = 'abinitio_bond_angles'
     target_folder = os.path.join(self.out_folder, target_name)
     os.mkdir(target_folder)
     os.chdir(target_folder)
     # load data into a fb Molecule
     out_m = Molecule()
     out_m.elem = self.m.elem.copy()
     out_m.xyzs = []
     out_m.qm_energies = []
     out_m.comms = []
     for record in records:
         qcmol = record['molecule']
         energy = record['energy']
         name = record.get('name', 'created by FBTargetBuilder')
         m = self.qc_molecule_to_fb_molecule(qcmol)
         assert m.elem == out_m.elem, 'Elements list of resulting qcmol is not consistent with self.m'
         # append geometry
         out_m.xyzs.append(m.xyzs[0])
         # append energy
         out_m.qm_energies.append(energy)
         # append name
         out_m.comms.append(name)
     # write output
     print(
         f"Writing {len(records)} frames into targets/abinitio_bond_angles/traj.xyz"
     )
     out_m.write('traj.xyz')
     print(
         f"Writing {len(records)} frames into targets/abinitio_bond_angles/qdata.txt"
     )
     out_m.write('qdata.txt')
示例#2
0
 def qc_molecule_to_fb_molecule(self, qc_molecule):
     """ Convert an qcportal.Molecule object to a forcebalance.molecule.Molecule object"""
     m = Molecule()
     m.elem = [Elements[i] for i in qc_molecule.atomic_numbers]
     m.xyzs = [qc_molecule.geometry * bohr2ang]
     m.molecular_charge = qc_molecule.molecular_charge
     m.mult = qc_molecule.molecular_multiplicity
     return m
示例#3
0
 def __init__(self, molecule=None):
     # molecule.py can not parse psi4 input yet, so we use self.load_psi4_input() as a walk around
     if molecule is None:
         # create a fake molecule
         molecule = Molecule()
         molecule.elem = ['H']
         molecule.xyzs = [[[0, 0, 0]]]
     super(Psi4, self).__init__(molecule)
     self.threads = None
def write_molecule_files(molecule_data_list):
    molecule, e0 = molecule_data_list[0]
    qcjson_mol = molecule.dict(encoding='json')
    oemol = cmiles.utils.load_molecule(qcjson_mol)
    # write the mol2 file using oechem
    ofs.open(f'input.mol2')
    oechem.OEWriteMolecule(ofs, oemol)
    ofs.close()
    # write the pdb file using ForceBalance Molecule
    fbmol = Molecule(f'input.mol2')
    fbmol.write(f'conf.pdb')
    # write xyz file using a new ForceBalance Molecule object
    m = Molecule()
    m.elem = [Elements[i] for i in molecule.atomic_numbers]
    m.xyzs = []
    m.qm_energies = []
    for mol, e in molecule_data_list:
        m.xyzs.append(mol.geometry * bohr2ang)
        m.qm_energies.append(e)
    m.write("coords.xyz")
    # write qdata.txt file with coords and energies
    m.write('qdata.txt')
def read_sdf_to_fb_mol(filename):
    """ read sdf file and return ForceBalance.molecule.Molecule object """
    from forcebalance.molecule import Molecule, Elements
    import numpy as np
    mol_list = read_split_mols(filename)
    assert len(mol_list) == 1, 'file contains multiple molecules'
    oe_mol = mol_list[0]
    # create a new molecule
    fb_mol = Molecule()
    # load elems
    fb_mol.elem = [Elements[a.GetAtomicNum()] for a in oe_mol.GetAtoms()]
    noa = len(fb_mol.elem)
    # load coordinates
    coords_dict = oe_mol.GetCoords()
    fb_mol.xyzs = [np.array([coords_dict[i] for i in range(noa)])]
    # load bonds
    bonds, bond_orders = [], []
    for oe_bond in oe_mol.GetBonds():
        idx_a = oe_bond.GetBgnIdx()
        idx_b = oe_bond.GetEndIdx()
        bond = (idx_a, idx_b) if idx_a <= idx_b else (idx_b, idx_a)
        bonds.append(bond)
        bond_orders.append(oe_bond.GetOrder())
    fb_mol.bonds = bonds
    fb_mol.bond_orders = bond_orders
    # load atomic formal charges
    atomic_formal_charges = [a.GetFormalCharge() for a in oe_mol.GetAtoms()]
    molecular_charge = sum(atomic_formal_charges)
    fb_mol.Data['molecular_charge'] = molecular_charge
    fb_mol.Data['atomic_formal_charges'] = atomic_formal_charges
    # set the oe_mol as one attribute
    fb_mol.oe_mol = oe_mol
    # set the cmiles id
    mapped_smiles = cmiles.utils.mol_to_smiles(oe_mol)
    fb_mol.Data['cmiles_id'] = cmiles.generator.get_molecule_ids(mapped_smiles)
    return fb_mol
示例#6
0
from forcebalance.molecule import Molecule
from forcebalance.readfrq import read_frq_gen

# Frequency output file.
fout = sys.argv[1]

# Mode number, starting from 1.
modenum = int(sys.argv[2])

if modenum == 0:
    raise RuntimeError("Start mode number from one, please")

frqs, modes, intens, elem, xyz = read_frq_gen(fout)

M = Molecule()
M.elem = elem[:]
M.xyzs = []

xmode = modes[modenum - 1]
xmode /= (np.linalg.norm(xmode)/np.sqrt(M.na))
xmode *= 0.3 # Reasonable vibrational amplitude

spac = np.linspace(0, 1, 101)
disp = np.concatenate((spac, spac[::-1][1:], -1*spac[1:], -1*spac[::-1][1:-1]))

for i in disp:
    M.xyzs.append(xyz+i*xmode.reshape(-1,3))

M.comms = ['Vibrational Mode %i Frequency %.3f Displacement %.3f' % (modenum, frqs[modenum-1], disp[i]*(np.linalg.norm(xmode)/np.sqrt(M.na))) for i in range(len(M))]

M.write(os.path.splitext(fout)[0]+'.mode%03i.xyz' % modenum)
def gen_tid_calculated_molecules_list(torsiondrive_data,
                                      forcefield,
                                      verbose=False):

    # gen dictionary with keys, including all tids in the input forcefield
    ff_torsion_param_list = forcefield.get_parameter_handler(
        'ProperTorsions').parameters

    tid_calculated_molecules_list = {}
    molecules_list_dict_from_td = defaultdict = {}
    for torsion_param in ff_torsion_param_list:
        tid_calculated_molecules_list[torsion_param.id] = []
    if os.path.exists('tmp'):
        shutil.rmtree('tmp')
    os.mkdir('tmp')
    os.chdir('tmp')
    for entry_index, td_data in torsiondrive_data.items():
        # pick a single initial molecule
        qcmol = td_data['initial_molecules'][0]

        # write input.mol2 file
        qcjson_mol = qcmol.dict(encoding='json')
        oemol = cmiles.utils.load_molecule(qcjson_mol)
        ofs.open(f'input.mol2')
        oechem.OEWriteMolecule(ofs, oemol)
        ofs.close()
        # test mol2 file
        success, msg, molecule_labels = test_ff_mol2(forcefield, 'input.mol2')
        if not success:
            if verbose == True:
                print(
                    'Error occured while testing input.mol2. Excluded in tid_calculated_molecules_list. '
                )
            continue
        # check if the torsion scan contains one or more conformers forming strong internal H bonds
        if success:
            # write conf.pdb file
            fbmol = FBMolecule(f'input.mol2')
            # list of grid ids sorted
            sorted_grid_ids = sorted(td_data['final_molecules'].keys())
            # write scan.xyz
            target_mol = FBMolecule()
            target_mol.elem = fbmol.elem
            target_mol.xyzs = []
            target_mol.qm_energies = []
            target_mol.qm_grads = []
            for grid_id in sorted_grid_ids:
                grid_qc_mol = td_data['final_molecules'][grid_id]
                # convert geometry unit Bohr -> Angstrom
                geo = grid_qc_mol.geometry * 0.529177
                target_mol.xyzs.append(geo)
                # add energy and gradient
                target_mol.qm_energies.append(
                    td_data['final_energies'][grid_id])
                target_mol.qm_grads.append(td_data['final_gradients'][grid_id])
            target_mol.write('scan.xyz')

            no_hbonds = check_Hbond(scan_fnm='scan.xyz', top_fnm='input.mol2')
            if not no_hbonds:
                if verbose == True:
                    print(
                        'Internal hydrogen bond detacted. Excluded in tid_calculated_molecules_list. '
                    )
                success = False
        if success:
            mol_index = td_data['attributes']["canonical_isomeric_smiles"]
            indices = td_data['keywords']['dihedrals'][0]
            tid = molecule_labels['ProperTorsions'][tuple(indices)].id

            # qcschema_molecules = [qcmol.dict(encoding='json') for qcmol in td_data['initial_molecules']]
            tid_calculated_molecules_list[tid].append({
                'mol_index': mol_index,
                'indices': indices
            })

            qcschema_molecules = []
            for qcmol in td_data['initial_molecules']:
                j_dict = qcmol.dict(encoding='json')
                qcschema_molecule = {
                    'symbols': j_dict['symbols'],
                    'geometry': j_dict['geometry'],
                    'connectivity': j_dict['connectivity'],
                    'molecular_charge': j_dict['molecular_charge'],
                    'molecular_multiplicity': j_dict['molecular_multiplicity']
                }
                qcschema_molecules.append(qcschema_molecule)

            molecules_list_dict_from_td[mol_index] = qcschema_molecules
    print("\n## Available torsion scans from QCArchive ##\n" + '-' * 90)
    print(f"{'idx':<7} {'tid':7s}  {'Number of torsion scans'}")
    for idx, (tid, molecules_list) in enumerate(
            tid_calculated_molecules_list.items()):
        if len(molecules_list) > 0:
            print(f'{idx:<7} {tid:7s}  {len(molecules_list)}')
    print('-' * 90)
    os.chdir('..')
    shutil.rmtree('tmp')
    return tid_calculated_molecules_list, molecules_list_dict_from_td
示例#8
0
def make_fb_targets():
    result_mol_folders = [
        os.path.join(results_folder, f) for f in os.listdir(results_folder)
        if os.path.isdir(os.path.join(results_folder, f))
    ]
    result_mol_folders.sort()
    print(
        f"\nLoading data from {len(result_mol_folders)} result folders under {results_folder}"
    )
    # output folder
    if os.path.exists(out_folder):
        shutil.rmtree(out_folder)
    os.mkdir(out_folder)
    target_names = []
    for mol_folder in result_mol_folders:
        mol_name = os.path.basename(mol_folder)
        # the name of the molecules should be consistent with the mol_folder
        mol_file = os.path.join(molecules_folder, mol_name + '.mol2')
        molecule = Molecule(mol_file)
        # find all torsion data
        finished_scans = []
        for f in os.listdir(mol_folder):
            name, ext = os.path.splitext(f)
            if ext == '.xyz':
                finished_scans.append(name)
        if len(finished_scans) == 0:
            print(f'No finished scans found in {mol_folder}')
            continue
        # output target name
        target_name = 'td_' + mol_name
        target_names.append(target_name)
        # make target folder
        this_target_folder = os.path.join(out_folder, target_name)
        os.mkdir(this_target_folder)
        # read data from each finished scans
        target_mol = Molecule()
        target_mol.elem = molecule.elem
        target_mol.xyzs = []
        target_mol.qm_energies = []
        target_mol.qm_grads = []
        for f in finished_scans:
            xyz_file = os.path.join(mol_folder, f + '.xyz')
            m = Molecule(xyz_file)
            target_mol.xyzs += m.xyzs
            # read energy from comment line
            energies = [float(comm.split()[-1]) for comm in m.comms]
            target_mol.qm_energies += energies
            # read gradient
            grad_file = os.path.join(mol_folder, f + '.gradxyz')
            grads = read_gradxyz(grad_file)
            target_mol.qm_grads += grads
        # write qdata.txt
        target_mol.write(os.path.join(this_target_folder, 'qdata.txt'))
        # write scan.xyz
        target_mol.write(os.path.join(this_target_folder, 'scan.xyz'))
        # write pdb
        molecule.write(os.path.join(this_target_folder, 'conf.pdb'))
        # copy mol2 file
        shutil.copyfile(mol_file, os.path.join(this_target_folder,
                                               'input.mol2'))
        # write a note
        with open(os.path.join(this_target_folder, 'notes.txt'), 'w') as fnote:
            fnote.write(
                "Notes: This target is made by make_fb_targets.py, using data from\n"
            )
            fnote.write(mol_file + '\n')
            for f in finished_scans:
                xyz_file = os.path.join(mol_folder, f + '.xyz')
                grad_file = os.path.join(mol_folder, f + '.gradxyz')
                fnote.write(xyz_file + '\n')
                fnote.write(grad_file + '\n')
    # write a target.in file for use in ForceBalance input
    with open(os.path.join(out_folder, 'targets.in'), 'w') as fout:
        for tname in target_names:
            fout.write(target_str.format(name=tname) + '\n')
    print(f"Targets generation finished!")
    print(
        f"You can copy contents in {os.path.join(out_folder, 'targets.in')} to your ForceBalance input file."
    )
示例#9
0
def make_torsiondrive_target(dataset_name, torsiondrive_data, test_ff=None):
    """
    Make a folder of ForceBalance targets from the torsiondrive data
    """
    target_name_prefix = 'td_' + dataset_name.replace(' ', '_')
    # create new targets folder
    if os.path.exists('targets'):
        shutil.rmtree('targets')
    os.mkdir('targets')
    os.chdir('targets')
    # write each entry as an individual target
    target_idx = 0
    n_targets = len(torsiondrive_data)
    idx_fmt_string = get_int_fmt_string(n_targets)
    target_names = []
    print(f"Generating {n_targets} targets")
    for entry_index, td_data in torsiondrive_data.items():
        # pick a single initial molecule
        qcmol = td_data['initial_molecules'][0]
        # get mol_formula
        mol_formula = qcmol.get_molecular_formula()
        # create target folder
        target_idx_str = idx_fmt_string.format(target_idx)
        target_name = f"{target_name_prefix}_{target_idx_str}_{mol_formula}"
        print(f"{target_idx}: {target_name}")
        os.mkdir(target_name)
        os.chdir(target_name)
        # write a note
        with open('note.txt', 'w') as notefile:
            notefile.write(
                f'Target generated from dataset {dataset_name}, entry {entry_index}'
            )
        # write input.mol2 file
        qcjson_mol = qcmol.dict(encoding='json')
        oemol = cmiles.utils.load_molecule(qcjson_mol)
        ofs.open(f'input.mol2')
        oechem.OEWriteMolecule(ofs, oemol)
        ofs.close()
        # test mol2 file
        success = True
        if test_ff != None:
            success, msg, molecule_labels = test_ff_mol2(test_ff, 'input.mol2')
        if not success:
            if not os.path.exists('../error_mol2s'):
                os.mkdir('../error_mol2s')
            shutil.move(f'input.mol2', f'../error_mol2s/{target_name}.mol2')
            with open(f'../error_mol2s/{target_name}_error.txt',
                      'w') as notefile:
                notefile.write(f'{dataset_name}\ntarget_name {target_name}\n')
                notefile.write(
                    f'entry {entry_index}\ntd_keywords {td_data["keywords"]}\n'
                )
                notefile.write(f'error message:\n{msg}')
            # remove this folder
            os.chdir('..')
            shutil.rmtree(target_name)
        else:
            # write conf.pdb file
            fbmol = Molecule(f'input.mol2')
            fbmol.write(f'conf.pdb')
            # list of grid ids sorted
            sorted_grid_ids = sorted(td_data['final_molecules'].keys())
            # write scan.xyz and qdata.txt files
            target_mol = Molecule()
            target_mol.elem = fbmol.elem
            target_mol.xyzs = []
            target_mol.qm_energies = []
            target_mol.qm_grads = []
            for grid_id in sorted_grid_ids:
                grid_qc_mol = td_data['final_molecules'][grid_id]
                # convert geometry unit Bohr -> Angstrom
                geo = grid_qc_mol.geometry * 0.529177
                target_mol.xyzs.append(geo)
                # add energy and gradient
                target_mol.qm_energies.append(
                    td_data['final_energies'][grid_id])
                target_mol.qm_grads.append(td_data['final_gradients'][grid_id])
            target_mol.write('scan.xyz')
            target_mol.write('qdata.txt')
            # check if the torsion scan contains one or more conformers forming strong internal H bonds
            no_hbonds, hbonds = screening_Hbond(mol2_fnm='input.mol2',
                                                scan_fnm='scan.xyz')
            if no_hbonds != True:
                msg = 'One or more internal H bonds exist.'
                if not os.path.exists('../error_mol2s'):
                    os.mkdir('../error_mol2s')
                shutil.move(f'input.mol2',
                            f'../error_mol2s/{target_name}.mol2')
                with open(f'../error_mol2s/{target_name}_error.txt',
                          'w') as notefile:
                    notefile.write(
                        f'{dataset_name}\ntarget_name {target_name}\n')
                    notefile.write(
                        f'entry {entry_index}\ntd_keywords {td_data["keywords"]}\n'
                    )
                    notefile.write(f'error message:\n{msg}')
                # remove this folder
                os.chdir('..')
                shutil.rmtree(target_name)
            else:
                # pick metadata to write into the metadata.json file
                metadata = copy.deepcopy(td_data['keywords'])
                metadata['dataset_name'] = dataset_name
                metadata['entry_label'] = entry_index
                metadata['canonical_smiles'] = td_data['attributes'].get(
                    'canonical_smiles', 'unknown')
                metadata['torsion_grid_ids'] = sorted_grid_ids
                # find SMIRKs for torsion being scaned if test_ff is provided
                if test_ff:
                    metadata['smirks'] = []
                    metadata['smirks_ids'] = []
                    for torsion_indices in td_data['keywords']['dihedrals']:
                        param = molecule_labels['ProperTorsions'][tuple(
                            torsion_indices)]
                        metadata['smirks'].append(param.smirks)
                        metadata['smirks_ids'].append(param.id)
                with open('metadata.json', 'w') as jsonfile:
                    json.dump(metadata, jsonfile, indent=2)
                # finish this target
                target_names.append(target_name)
                os.chdir('..')
        target_idx += 1

    # write targets.{dataset_name}.in file
    target_in_fnm = f"targets.{dataset_name.replace(' ', '_')}.in"
    with open(target_in_fnm, 'w') as outfile:
        for target_name in target_names:
            outfile.write(target_in_str.format(name=target_name))
    print(f"Successfull generated {len(target_names)} targets.")
    print(
        f"You can copy contents in {target_in_fnm} to your ForceBalance input file."
    )
    os.chdir('..')
示例#10
0
                espval.append(float(s[3]))
            elif len(espxyz) > 0:
                # After reading in a block of ESPs, don't read any more.
                ESPMode = -1 
        if line.strip().startswith("Geometry (in Angstrom)"):
            XMode = 1
            EMode = len(elem) == 0
        if 'Electrostatic Potential' in line.strip() and ESPMode == 0:
            ESPMode = 1
    if len(xyzs) == 0:
        raise Exception('%s has length zero' % psiout)
    return xyzs, elem, espxyz, espval

xyzs, elem, espxyz, espval = read_psi_xyzesp(sys.argv[1])

M = Molecule()
M.xyzs = xyzs
M.elem = elem
M.write('%s.xyz' % os.path.splitext(sys.argv[1])[0])

EM = Molecule()
EM.xyzs = [np.array(espxyz) * 0.52917721092]
EM.elem = ['H' for i in range(len(espxyz))]
EM.write('%s.espx' % os.path.splitext(sys.argv[1])[0], ftype="xyz")

M.qm_espxyzs = EM.xyzs
M.qm_espvals = [np.array(espval)]
M.write("qdata.txt")

np.savetxt('%s.esp' % os.path.splitext(sys.argv[1])[0], espval)
示例#11
0
            elif len(espxyz) > 0:
                # After reading in a block of ESPs, don't read any more.
                ESPMode = -1
        if line.strip().startswith("Geometry (in Angstrom)"):
            XMode = 1
            EMode = len(elem) == 0
        if 'Electrostatic Potential' in line.strip() and ESPMode == 0:
            ESPMode = 1
    if len(xyzs) == 0:
        raise Exception('%s has length zero' % psiout)
    return xyzs, elem, espxyz, espval


xyzs, elem, espxyz, espval = read_psi_xyzesp(sys.argv[1])

M = Molecule()
M.xyzs = xyzs
M.elem = elem
M.write('%s.xyz' % os.path.splitext(sys.argv[1])[0])

EM = Molecule()
EM.xyzs = [np.array(espxyz) * 0.52917721092]
EM.elem = ['H' for i in range(len(espxyz))]
EM.write('%s.espx' % os.path.splitext(sys.argv[1])[0], ftype="xyz")

M.qm_espxyzs = EM.xyzs
M.qm_espvals = [np.array(espval)]
M.write("qdata.txt")

np.savetxt('%s.esp' % os.path.splitext(sys.argv[1])[0], espval)
示例#12
0
import os, sys, re
import numpy as np
from forcebalance.molecule import Molecule
from forcebalance.readfrq import read_frq_psi

# Psi4 output file.
psiout = sys.argv[1]

# Mode number, starting from 1.
modenum = int(sys.argv[2])

frqs, modes, elem, xyz = read_frq_psi(psiout)

M = Molecule()
M.elem = elem[:]
M.xyzs = []

xmode = modes[modenum - 1]
xmode /= (np.linalg.norm(xmode)/np.sqrt(M.na))
xmode *= 0.3 # Reasonable vibrational amplitude

spac = np.linspace(0, 1, 101)
disp = np.concatenate((spac, spac[::-1][1:], -1*spac[1:], -1*spac[::-1][1:-1]))

for i in disp:
    M.xyzs.append(xyz+i*xmode)

M.comms = ['Vibrational Mode %i Frequency %.3f Displacement %.3f' % (modenum, frqs[modenum-1], disp[i]*(np.linalg.norm(xmode)/np.sqrt(M.na))) for i in range(len(M))]

M.write(os.path.splitext(psiout)[0]+'.mode%03i.xyz' % modenum)