def test_get_bonds(self): mol1 = Molecule.from_file(os.path.join(test_dir, "t1.xyz")) msc = MoleculeStructureComparator() # noinspection PyProtectedMember bonds = msc._get_bonds(mol1) bonds_ref = [(0, 1), (0, 2), (0, 3), (0, 23), (3, 4), (3, 5), (5, 6), (5, 7), (7, 8), (7, 9), (7, 21), (9, 10), (9, 11), (9, 12), (12, 13), (12, 14), (12, 15), (15, 16), (15, 17), (15, 18), (18, 19), (18, 20), (18, 21), (21, 22), (21, 23), (23, 24), (23, 25)] self.assertEqual(bonds, bonds_ref) mol2 = Molecule.from_file(os.path.join(test_dir, "MgBH42.xyz")) bonds = msc._get_bonds(mol2) self.assertEqual(bonds, [(1, 3), (2, 3), (3, 4), (3, 5), (6, 8), (7, 8), (8, 9), (8, 10)]) msc = MoleculeStructureComparator(ignore_ionic_bond=False) bonds = msc._get_bonds(mol2) self.assertEqual(bonds, [(0, 1), (0, 2), (0, 3), (0, 5), (0, 6), (0, 7), (0, 8), (0, 9), (1, 3), (2, 3), (3, 4), (3, 5), (6, 8), (7, 8), (8, 9), (8, 10)]) mol1 = Molecule.from_file( os.path.join(test_dir, "molecule_with_halogen_bonds_1.xyz")) msc = MoleculeStructureComparator() # noinspection PyProtectedMember bonds = msc._get_bonds(mol1) self.assertEqual(bonds, [(0, 12), (0, 13), (0, 14), (0, 15), (1, 12), (1, 16), (1, 17), (1, 18), (2, 4), (2, 11), (2, 19), (3, 5), (3, 10), (3, 20), (4, 6), (4, 10), (5, 11), (5, 12), (6, 7), (6, 8), (6, 9)])
def test_structural_change_in_geom_opt(self): qcout_path = os.path.join(test_dir, "mol_1_3_bond.qcout") qcout = QcOutput(qcout_path) mol1 = qcout.data[0]["molecules"][0] mol2 = qcout.data[0]["molecules"][-1] priority_bonds = [[0, 1], [0, 2], [1, 3], [1, 4], [1, 7], [2, 5], [2, 6], [2, 8], [4, 6], [4, 10], [6, 9]] msc = MoleculeStructureComparator(priority_bonds=priority_bonds) self.assertTrue(msc.are_equal(mol1, mol2))
def test_get_bonds(self): mol = read_mol(os.path.join(test_dir, "t1.xyz")) msc = MoleculeStructureComparator() # noinspection PyProtectedMember bonds = msc._get_bonds(mol) bonds_ref = [(0, 1), (0, 2), (0, 3), (0, 23), (3, 4), (3, 5), (5, 6), (5, 7), (7, 8), (7, 9), (7, 21), (9, 10), (9, 11), (9, 12), (12, 13), (12, 14), (12, 15), (15, 16), (15, 17), (15, 18), (18, 19), (18, 20), (18, 21), (21, 22), (21, 23), (23, 24), (23, 25)] self.assertEqual(bonds, bonds_ref)
def test_to_and_from_dict(self): msc1 = MoleculeStructureComparator() d1 = msc1.to_dict d2 = MoleculeStructureComparator.from_dict(d1).to_dict self.assertEqual(d1, d2) thio1 = read_mol(os.path.join(test_dir, "thiophene1.xyz")) # noinspection PyProtectedMember msc2 = MoleculeStructureComparator( bond_length_cap=0.2, priority_bonds=msc1._get_bonds(thio1), priority_cap=0.5) d1 = msc2.to_dict d2 = MoleculeStructureComparator.from_dict(d1).to_dict self.assertEqual(d1, d2)
def test_to_and_from_dict(self): msc1 = MoleculeStructureComparator() d1 = msc1.as_dict() d2 = MoleculeStructureComparator.from_dict(d1).as_dict() self.assertEqual(d1, d2) thio1 = Molecule.from_file(os.path.join(test_dir, "thiophene1.xyz")) # noinspection PyProtectedMember msc2 = MoleculeStructureComparator( bond_length_cap=0.2, priority_bonds=msc1._get_bonds(thio1), priority_cap=0.5 ) d1 = msc2.as_dict() d2 = MoleculeStructureComparator.from_dict(d1).as_dict() self.assertEqual(d1, d2)
def test_get_13_bonds(self): priority_bonds = [[0, 1], [0, 2], [1, 3], [1, 4], [1, 7], [2, 5], [2, 6], [2, 8], [4, 6], [4, 10], [6, 9]] bonds_13 = MoleculeStructureComparator.get_13_bonds(priority_bonds) ans = ((0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (1, 2), (1, 6), (1, 10), (2, 4), (2, 9), (3, 4), (3, 7), (4, 7), (4, 9), (5, 6), (5, 8), (6, 8), (6, 10)) self.assertEqual(bonds_13, tuple(ans))
def test_get_bonds(self): mol1 = Molecule.from_file(os.path.join(test_dir, "t1.xyz")) msc = MoleculeStructureComparator() # noinspection PyProtectedMember bonds = msc._get_bonds(mol1) bonds_ref = [(0, 1), (0, 2), (0, 3), (0, 23), (3, 4), (3, 5), (5, 6), (5, 7), (7, 8), (7, 9), (7, 21), (9, 10), (9, 11), (9, 12), (12, 13), (12, 14), (12, 15), (15, 16), (15, 17), (15, 18), (18, 19), (18, 20), (18, 21), (21, 22), (21, 23), (23, 24), (23, 25)] self.assertEqual(bonds, bonds_ref) mol2 = Molecule.from_file(os.path.join(test_dir, "MgBH42.xyz")) bonds = msc._get_bonds(mol2) self.assertEqual(bonds, [(1, 3), (2, 3), (3, 4), (3, 5), (6, 8), (7, 8), (8, 9), (8, 10)]) msc = MoleculeStructureComparator(ignore_ionic_bond=False) bonds = msc._get_bonds(mol2) self.assertEqual(bonds, [(0, 1), (0, 2), (0, 3), (0, 5), (0, 6), (0, 7), (0, 8), (0, 9), (1, 3), (2, 3), (3, 4), (3, 5), (6, 8), (7, 8), (8, 9), (8, 10)]) mol1 = Molecule.from_file(os.path.join(test_dir, "molecule_with_halogen_bonds_1.xyz")) msc = MoleculeStructureComparator() # noinspection PyProtectedMember bonds = msc._get_bonds(mol1) self.assertEqual(bonds, [(0, 12), (0, 13), (0, 14), (0, 15), (1, 12), (1, 16), (1, 17), (1, 18), (2, 4), (2, 11), (2, 19), (3, 5), (3, 10), (3, 20), (4, 6), (4, 10), (5, 11), (5, 12), (6, 7), (6, 8), (6, 9)])
def _check_structure_change(mol1, mol2, fw_spec): """ Check whether structure is changed: Return: True: structure changed, False: unchanged """ if 'egsnl' not in fw_spec: raise ValueError("Can't find initial SNL") if isinstance(fw_spec['egsnl'], EGStructureNL): egsnl_dict = fw_spec['egsnl'].as_dict() else: egsnl_dict = fw_spec['egsnl'] if 'known_bonds' not in egsnl_dict: raise ValueError("Can't find known bonds information") bonds = egsnl_dict['known_bonds'] msc = MoleculeStructureComparator(priority_bonds=bonds) return not msc.are_equal(mol1, mol2)
def _check_structure_change(mol1, mol2, qcout_path): """ Check whether structure is changed: Return: True: structure changed, False: unchanged """ dirname = os.path.dirname(qcout_path) fw_spec_path = os.path.join(dirname, "FW.json") with zopen(zpath(fw_spec_path), 'rt') as f: fw = json.load(f) if 'egsnl' not in fw['spec']: raise ValueError("Can't find initial SNL") if 'known_bonds' not in fw['spec']['egsnl']: raise ValueError("Can't find known bonds information") bonds = fw['spec']['egsnl']['known_bonds'] msc = MoleculeStructureComparator(priority_bonds=bonds) return not msc.are_equal(mol1, mol2)
def test_are_equal(self): msc1 = MoleculeStructureComparator() mol1 = Molecule.from_file(os.path.join(test_dir, "t1.xyz")) mol2 = Molecule.from_file(os.path.join(test_dir, "t2.xyz")) mol3 = Molecule.from_file(os.path.join(test_dir, "t3.xyz")) self.assertFalse(msc1.are_equal(mol1, mol2)) self.assertTrue(msc1.are_equal(mol2, mol3)) thio1 = Molecule.from_file(os.path.join(test_dir, "thiophene1.xyz")) thio2 = Molecule.from_file(os.path.join(test_dir, "thiophene2.xyz")) # noinspection PyProtectedMember msc2 = MoleculeStructureComparator( priority_bonds=msc1._get_bonds(thio1)) self.assertTrue(msc2.are_equal(thio1, thio2))
def test_are_equal(self): msc1 = MoleculeStructureComparator() mol1 = Molecule.from_file(os.path.join(test_dir, "t1.xyz")) mol2 = Molecule.from_file(os.path.join(test_dir, "t2.xyz")) mol3 = Molecule.from_file(os.path.join(test_dir, "t3.xyz")) self.assertFalse(msc1.are_equal(mol1, mol2)) self.assertTrue(msc1.are_equal(mol2, mol3)) thio1 = Molecule.from_file(os.path.join(test_dir, "thiophene1.xyz")) thio2 = Molecule.from_file(os.path.join(test_dir, "thiophene2.xyz")) # noinspection PyProtectedMember msc2 = MoleculeStructureComparator( priority_bonds=msc1._get_bonds(thio1)) self.assertTrue(msc2.are_equal(thio1, thio2)) hal1 = Molecule.from_file(os.path.join(test_dir, "molecule_with_halogen_bonds_1.xyz")) hal2 = Molecule.from_file(os.path.join(test_dir, "molecule_with_halogen_bonds_2.xyz")) msc3 = MoleculeStructureComparator(priority_bonds=msc1._get_bonds(hal1)) self.assertTrue(msc3.are_equal(hal1, hal2))
def test_are_equal(self): msc1 = MoleculeStructureComparator() mol1 = read_mol(os.path.join(test_dir, "t1.xyz")) mol2 = read_mol(os.path.join(test_dir, "t2.xyz")) mol3 = read_mol(os.path.join(test_dir, "t3.xyz")) self.assertFalse(msc1.are_equal(mol1, mol2)) self.assertTrue(msc1.are_equal(mol2, mol3)) thio1 = read_mol(os.path.join(test_dir, "thiophene1.xyz")) thio2 = read_mol(os.path.join(test_dir, "thiophene2.xyz")) # noinspection PyProtectedMember msc2 = MoleculeStructureComparator( priority_bonds=msc1._get_bonds(thio1)) self.assertTrue(msc2.are_equal(thio1, thio2))
def get_redo_workflow(self, qchem_input_params, sp_params, max_iterations=3): """ Identifies molecules which need to be re-run (for now, based only on presence of negative frequencies) and then performs a frequency flattening workflow on those molecules. This is a hack. In the future, a frequency flattening workflow should be used from the beginning. :param qchem_input_params: dict :param sp_params: For OptFreqSPFW, single-point calculations can be treated differently from Opt and Freq. In this case, another dict for sp must be used. :param max_iterations: Maximum number of iterations for frequency flattening. Default is 3. :return: Workflow """ if self.db is None: raise RuntimeError("Cannot access database to determine what" "molecules need to be re-calculated.") fws = [] collection = self.db.db["molecules"] for mol in collection.find({}): frequencies = mol["output"]["frequencies"] if any([True if x < 0 else False for x in frequencies]): min_molecule_perturb_scale = 0.1 max_molecule_perturb_scale = 0.3 scale_grid = 10 perturb_scale_grid = (max_molecule_perturb_scale - min_molecule_perturb_scale) / scale_grid msc = MoleculeStructureComparator() old_molecule = None for calc in mol["calcs_reversed"]: if calc["task"]["type"] in ["freq", "frequency" ] and old_molecule is None: negative_freq_vecs = calc.get( "frequency_mode_vectors")[0] old_coords = calc.get("initial_geometry") old_molecule = Molecule.from_dict( calc.get("initial_molecule")) structure_successfully_perturbed = False for molecule_perturb_scale in np.arange( max_molecule_perturb_scale, min_molecule_perturb_scale, -perturb_scale_grid): new_coords = perturb_coordinates( old_coords=old_coords, negative_freq_vecs=negative_freq_vecs, molecule_perturb_scale=molecule_perturb_scale, reversed_direction=False) new_molecule = Molecule( species=old_molecule.species, coords=new_coords, charge=old_molecule.charge, spin_multiplicity=old_molecule.spin_multiplicity) if msc.are_equal(old_molecule, new_molecule): structure_successfully_perturbed = True break if not structure_successfully_perturbed: raise Exception( "Unable to perturb coordinates to remove negative frequency without changing the bonding structure" ) mol_id = mol["mol_id"] dir_name = mol["dir_name"].split("/")[-1] if dir_name not in listdir(self.base_dir): os.mkdir(join(self.base_dir, dir_name)) fws.append( OptFreqSPFW(molecule=new_molecule, name="Flattening: {}/{}".format( mol_id, dir_name), qchem_cmd="qchem -slurm", input_file=join(self.base_dir, dir_name, mol_id + ".in"), output_file=join(self.base_dir, dir_name, mol_id + ".out"), qclog_file=join(self.base_dir, dir_name, mol_id + ".qclog"), max_cores=32, max_iterations=max_iterations, qchem_input_params=qchem_input_params, sp_params=sp_params, db_file=self.db_file)) if len(fws) == 0: return None else: return Workflow(fws)
def opt_with_frequency_flattener(cls, qchem_command, multimode="openmp", input_file="mol.qin", output_file="mol.qout", qclog_file="mol.qclog", max_iterations=10, max_molecule_perturb_scale=0.3, reversed_direction=False, ignore_connectivity=False, **QCJob_kwargs): """ Optimize a structure and calculate vibrational frequencies to check if the structure is in a true minima. If a frequency is negative, iteratively perturbe the geometry, optimize, and recalculate frequencies until all are positive, aka a true minima has been found. Args: qchem_command (str): Command to run QChem. multimode (str): Parallelization scheme, either openmp or mpi. input_file (str): Name of the QChem input file. output_file (str): Name of the QChem output file. max_iterations (int): Number of perturbation -> optimization -> frequency iterations to perform. Defaults to 10. max_molecule_perturb_scale (float): The maximum scaled perturbation that can be applied to the molecule. Defaults to 0.3. reversed_direction (bool): Whether to reverse the direction of the vibrational frequency vectors. Defaults to False. ignore_connectivity (bool): Whether to ignore differences in connectivity introduced by structural perturbation. Defaults to False. **QCJob_kwargs: Passthrough kwargs to QCJob. See :class:`custodian.qchem.jobs.QCJob`. """ min_molecule_perturb_scale = 0.1 scale_grid = 10 perturb_scale_grid = (max_molecule_perturb_scale - min_molecule_perturb_scale) / scale_grid msc = MoleculeStructureComparator() if not os.path.exists(input_file): raise AssertionError('Input file must be present!') orig_opt_input = QCInput.from_file(input_file) orig_opt_rem = copy.deepcopy(orig_opt_input.rem) orig_freq_rem = copy.deepcopy(orig_opt_input.rem) orig_freq_rem["job_type"] = "freq" first = True for ii in range(max_iterations): yield (QCJob(qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".opt_" + str(ii), backup=first, **QCJob_kwargs)) first = False opt_outdata = QCOutput(output_file + ".opt_" + str(ii)).data if opt_outdata["structure_change"] == "unconnected_fragments": print( "Unstable molecule broke into unconnected fragments! Exiting..." ) break else: freq_QCInput = QCInput(molecule=opt_outdata.get( "molecule_from_optimized_geometry"), rem=orig_freq_rem, opt=orig_opt_input.opt, pcm=orig_opt_input.pcm, solvent=orig_opt_input.solvent) freq_QCInput.write_file(input_file) yield (QCJob(qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".freq_" + str(ii), backup=first, **QCJob_kwargs)) outdata = QCOutput(output_file + ".freq_" + str(ii)).data errors = outdata.get("errors") if len(errors) != 0: raise AssertionError( 'No errors should be encountered while flattening frequencies!' ) if outdata.get('frequencies')[0] > 0.0: print("All frequencies positive!") break else: negative_freq_vecs = outdata.get( "frequency_mode_vectors")[0] old_coords = outdata.get("initial_geometry") old_molecule = outdata.get("initial_molecule") structure_successfully_perturbed = False for molecule_perturb_scale in np.arange( max_molecule_perturb_scale, min_molecule_perturb_scale, -perturb_scale_grid): new_coords = perturb_coordinates( old_coords=old_coords, negative_freq_vecs=negative_freq_vecs, molecule_perturb_scale=molecule_perturb_scale, reversed_direction=reversed_direction) new_molecule = Molecule( species=outdata.get('species'), coords=new_coords, charge=outdata.get('charge'), spin_multiplicity=outdata.get('multiplicity')) if msc.are_equal(old_molecule, new_molecule) or ignore_connectivity: structure_successfully_perturbed = True break if not structure_successfully_perturbed: raise Exception( "Unable to perturb coordinates to remove negative frequency without changing the bonding structure" ) new_opt_QCInput = QCInput(molecule=new_molecule, rem=orig_opt_rem, opt=orig_opt_input.opt, pcm=orig_opt_input.pcm, solvent=orig_opt_input.solvent) new_opt_QCInput.write_file(input_file)