def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun): try: fullpath = os.path.abspath(dir_name) d = jsanitize(self.additional_fields, strict=True) d["schema"] = { "code": "atomate", "version": QChemDrone.__version__ } d["dir_name"] = fullpath # If a saved "orig" input file is present, parse it incase the error handler made changes # to the initial input molecule or rem params, which we might want to filter for later if len(qcinput_files) > len(qcoutput_files): orig_input = QCInput.from_file( os.path.join(dir_name, qcinput_files.pop("orig"))) d["orig"] = {} d["orig"]["molecule"] = orig_input.molecule.as_dict() d["orig"]["molecule"]["charge"] = int( d["orig"]["molecule"]["charge"]) d["orig"]["rem"] = orig_input.rem d["orig"]["opt"] = orig_input.opt d["orig"]["pcm"] = orig_input.pcm d["orig"]["solvent"] = orig_input.solvent d["orig"]["smx"] = orig_input.smx if multirun: d["calcs_reversed"] = self.process_qchem_multirun( dir_name, qcinput_files, qcoutput_files) else: d["calcs_reversed"] = [ self.process_qchemrun(dir_name, taskname, qcinput_files.get(taskname), output_filename) for taskname, output_filename in qcoutput_files.items() ] # reverse the calculations data order so newest calc is first d["calcs_reversed"].reverse() d["structure_change"] = [] d["warnings"] = {} for entry in d["calcs_reversed"]: if ("structure_change" in entry and "structure_change" not in d["warnings"]): if entry["structure_change"] != "no_change": d["warnings"]["structure_change"] = True if "structure_change" in entry: d["structure_change"].append(entry["structure_change"]) for key in entry["warnings"]: if key not in d["warnings"]: d["warnings"][key] = True d_calc_init = d["calcs_reversed"][-1] d_calc_final = d["calcs_reversed"][0] d["input"] = { "initial_molecule": d_calc_init["initial_molecule"], "job_type": d_calc_init["input"]["rem"]["job_type"], } d["output"] = { "initial_molecule": d_calc_final["initial_molecule"], "job_type": d_calc_final["input"]["rem"]["job_type"], "mulliken": d_calc_final["Mulliken"][-1], } if "RESP" in d_calc_final: d["output"]["resp"] = d_calc_final["RESP"][-1] elif "ESP" in d_calc_final: d["output"]["esp"] = d_calc_final["ESP"][-1] if (d["output"]["job_type"] == "opt" or d["output"]["job_type"] == "optimization"): if "molecule_from_optimized_geometry" in d_calc_final: d["output"]["optimized_molecule"] = d_calc_final[ "molecule_from_optimized_geometry"] d["output"]["final_energy"] = d_calc_final["final_energy"] else: d["output"]["final_energy"] = "unstable" if d_calc_final["opt_constraint"]: d["output"]["constraint"] = [ d_calc_final["opt_constraint"][0], float(d_calc_final["opt_constraint"][6]), ] if (d["output"]["job_type"] == "freq" or d["output"]["job_type"] == "frequency"): d["output"]["frequencies"] = d_calc_final["frequencies"] d["output"]["enthalpy"] = d_calc_final["total_enthalpy"] d["output"]["entropy"] = d_calc_final["total_entropy"] if (d["input"]["job_type"] == "opt" or d["input"]["job_type"] == "optimization"): d["output"]["optimized_molecule"] = d_calc_final[ "initial_molecule"] d["output"]["final_energy"] = d["calcs_reversed"][1][ "final_energy"] opt_trajectory = [] calcs = copy.deepcopy(d["calcs_reversed"]) calcs.reverse() for calc in calcs: job_type = calc["input"]["rem"]["job_type"] if job_type == "opt" or job_type == "optimization": for ii, geom in enumerate(calc["geometries"]): site_properties = {"Mulliken": calc["Mulliken"][ii]} if "RESP" in calc: site_properties["RESP"] = calc["RESP"][ii] mol = Molecule( species=calc["species"], coords=geom, charge=calc["charge"], spin_multiplicity=calc["multiplicity"], site_properties=site_properties, ) traj_entry = {"molecule": mol} traj_entry["energy"] = calc["energy_trajectory"][ii] opt_trajectory.append(traj_entry) if opt_trajectory != []: d["opt_trajectory"] = opt_trajectory if "final_energy" not in d["output"]: if d_calc_final["final_energy"] != None: d["output"]["final_energy"] = d_calc_final["final_energy"] else: d["output"]["final_energy"] = d_calc_final["SCF"][-1][-1][ 0] if d_calc_final["completion"]: total_cputime = 0.0 total_walltime = 0.0 for calc in d["calcs_reversed"]: if calc["walltime"] is not None: total_walltime += calc["walltime"] if calc["cputime"] is not None: total_cputime += calc["cputime"] d["walltime"] = total_walltime d["cputime"] = total_cputime else: d["walltime"] = None d["cputime"] = None comp = d["output"]["initial_molecule"].composition d["formula_pretty"] = comp.reduced_formula d["formula_anonymous"] = comp.anonymized_formula d["formula_alphabetical"] = comp.alphabetical_formula d["chemsys"] = "-".join(sorted(set(d_calc_final["species"]))) if d_calc_final["point_group"] != None: d["pointgroup"] = d_calc_final["point_group"] else: try: d["pointgroup"] = PointGroupAnalyzer( d["output"]["initial_molecule"]).sch_symbol except ValueError: d["pointgroup"] = "PGA_error" bb = BabelMolAdaptor(d["output"]["initial_molecule"]) pbmol = bb.pybel_mol smiles = pbmol.write("smi").split()[0] d["smiles"] = smiles d["state"] = "successful" if d_calc_final[ "completion"] else "unsuccessful" if "special_run_type" in d: if d["special_run_type"] == "frequency_flattener": if d["state"] == "successful": orig_num_neg_freq = sum( 1 for freq in d["calcs_reversed"][-2]["frequencies"] if freq < 0) orig_energy = d_calc_init["final_energy"] final_num_neg_freq = sum( 1 for freq in d_calc_final["frequencies"] if freq < 0) final_energy = d["calcs_reversed"][1]["final_energy"] d["num_frequencies_flattened"] = (orig_num_neg_freq - final_num_neg_freq) if final_num_neg_freq > 0: # If a negative frequency remains, # and it's too large to ignore, if (final_num_neg_freq > 1 or abs( d["output"]["frequencies"][0]) >= 15.0): d["state"] = "unsuccessful" # then the flattening was unsuccessful if final_energy > orig_energy: d["warnings"]["energy_increased"] = True d["last_updated"] = datetime.datetime.utcnow() return d except Exception: logger.error(traceback.format_exc()) logger.error("Error in " + os.path.abspath(dir_name) + ".\n" + traceback.format_exc()) raise
def fragment_and_process(self, bonds): # Try to split the principle: try: frags = self.mol_graph.split_molecule_subgraphs(bonds,allow_reverse=True) frag_success = True except MolGraphSplitError: # If split is unsuccessful, then we have encountered a ring bond if len(bonds) == 1: self.ring_bonds += bonds # So we open the ring and make sure we haven't already encountered an identically opened fragment: RO_frag = open_ring(self.mol_graph, bonds, 1000) frag_done = False for done_RO_frag in self.done_RO_frags: if RO_frag.isomorphic_to(done_RO_frag): frag_done = True if not frag_done: # If this is a new fragment, save the record and then search for relevant fragment entries: self.done_RO_frags.append(RO_frag) opened_entries = self.search_fragment_entries(RO_frag) good_entries = [] # We will start by looking at entries with no structure change for frag in opened_entries[0]: # 0 -> no structural change # Since a ring opening still yields a single molecule, it should have the same charge as the principle: if frag["initial_molecule"]["charge"] == self.molecule_entry["final_molecule"]["charge"]: good_entries.append(frag) # If we didn't find any good entries, let's also look at those that exhibit structural changes: if len(good_entries) == 0: for frag in opened_entries[1]: # 1 -> YES structural change if frag["initial_molecule"]["charge"] == self.molecule_entry["final_molecule"]["charge"]: good_entries.append(frag) # If we still have no good entries, something must have gone wrong with the calculations: if len(good_entries) == 0: bb = BabelMolAdaptor.from_molecule_graph(RO_frag) pbmol = bb.pybel_mol smiles = pbmol.write(str("smi")).split()[0] specie = nx.get_node_attributes(self.mol_graph.graph, "specie") print("Missing ring opening fragment resulting from the breakage of " + specie[bonds[0][0]] + " " + specie[bonds[0][1]] + " bond " + str(bonds[0][0]) + " " + str(bonds[0][1]) + " which would yield a molecule with this SMILES string: " + smiles) elif len(good_entries) == 1: # If we have only one good entry, format it and addd it to the list that will eventually return: self.bond_dissociation_energies += [self.build_new_entry(good_entries, bonds)] else: # We shouldn't ever encounter more than one good entry. raise RuntimeError("There should only be one valid ring opening fragment! Exiting...") elif len(bonds) == 2: if not multibreak: raise RuntimeError("Should only be trying to break two bonds if multibreak is true! Exiting...") else: print('No reason to try and break more than two bonds at once! Exiting...') raise ValueError frag_success = False if frag_success: # If the principle did successfully split, then we aren't dealing with a ring bond. # As above, we begin by making sure we haven't already encountered an identical pair of fragments: frags_done = False for frag_pair in self.done_frag_pairs: if frag_pair[0].isomorphic_to(frags[0]): if frag_pair[1].isomorphic_to(frags[1]): frags_done = True break elif frag_pair[1].isomorphic_to(frags[0]): if frag_pair[0].isomorphic_to(frags[1]): frags_done = True break if not frags_done: # If we haven't, we save this pair and search for the relevant fragment entries: self.done_frag_pairs += [frags] num_entries_for_this_frag_pair = 0 frag1_entries = self.search_fragment_entries(frags[0]) frag2_entries = self.search_fragment_entries(frags[1]) frag1_charges_found = [] frag2_charges_found = [] # We then check for our expected charges of each fragment: for frag1 in frag1_entries[0] + frag1_entries[1]: if frag1["initial_molecule"]["charge"] not in frag1_charges_found: frag1_charges_found += [frag1["initial_molecule"]["charge"]] for frag2 in frag2_entries[0] + frag2_entries[1]: if frag2["initial_molecule"]["charge"] not in frag2_charges_found: frag2_charges_found += [frag2["initial_molecule"]["charge"]] # If we're missing some of either, tell the user: if len(frag1_charges_found) < len(self.expected_charges): bb = BabelMolAdaptor(frags[0].molecule) pbmol = bb.pybel_mol smiles = pbmol.write(str("smi")).split()[0] for charge in self.expected_charges: if charge not in frag1_charges_found: print("Missing charge " + str(charge) + " for fragment " + smiles) if len(frag2_charges_found) < len(self.expected_charges): bb = BabelMolAdaptor(frags[1].molecule) pbmol = bb.pybel_mol smiles = pbmol.write(str("smi")).split()[0] for charge in self.expected_charges: if charge not in frag2_charges_found: print("Missing charge " + str(charge) + " for fragment " + smiles) # Now we attempt to pair fragments with the right total charge, starting with only fragments with no structural change: for frag1 in frag1_entries[0]: # 0 -> no structural change for frag2 in frag2_entries[0]: # 0 -> no structural change if frag1["initial_molecule"]["charge"] + frag2["initial_molecule"]["charge"] == self.molecule_entry["final_molecule"]["charge"]: self.bond_dissociation_energies += [self.build_new_entry([frag1, frag2], bonds)] num_entries_for_this_frag_pair += 1 # If we haven't found the number of fragment pairs that we expect, we expand our search to include fragments that do exhibit structural change: if num_entries_for_this_frag_pair < len(self.expected_charges): for frag1 in frag1_entries[0]: # 0 -> no structural change for frag2 in frag2_entries[1]: # 1 -> YES structural change if frag1["initial_molecule"]["charge"] + frag2["initial_molecule"]["charge"] == self.molecule_entry["final_molecule"]["charge"]: self.bond_dissociation_energies += [self.build_new_entry([frag1, frag2], bonds)] num_entries_for_this_frag_pair += 1 for frag1 in frag1_entries[1]: # 1 -> YES structural change for frag2 in frag2_entries[0]: # 0 -> no structural change if frag1["initial_molecule"]["charge"] + frag2["initial_molecule"]["charge"] == self.molecule_entry["final_molecule"]["charge"]: self.bond_dissociation_energies += [self.build_new_entry([frag1, frag2], bonds)] num_entries_for_this_frag_pair += 1
def add_hydrogen(self): mol_0d = pb.readstring("smi", "CCCC").OBMol self.assertEqual(len(pb.Molecule(mol_0d).atoms), 2) adaptor = BabelMolAdaptor(mol_0d) adaptor.add_hydrogen() self.assertEqual(len(adaptor.pymatgen_mol.sites), 14)
def calc_energy(self, fragments_coords): # Energy orders: # Tabooed(1.0E5) > Umbrella(7.0E4) > Layout (6.0E4) > HardSphere (5.0E4) > # gravity (4.0E4) > PM7 (-1.0E1) tabooed_energy = 1.0E5 self.current_raw_position = fragments_coords self.current_optimized_position = None if self.is_current_position_tabooed(position_type="raw"): # 100000 return tabooed_energy # if self.gap_umbrella is not None: # gap_umbrella_energy = self.gap_umbrella.calc_energy( # fragments_coords) # if gap_umbrella_energy > self.gap_umbrella.base_energy - 100.0: # # 70000 # print('gap_umbrella') # return gap_umbrella_energy # sqm_umbrella_energy = self.sqm_umbrella.calc_energy(fragments_coords) # if sqm_umbrella_energy > self.sqm_umbrella.base_energy - 100.0: # print('sqm_umbrella') # return sqm_umbrella_energy # if self.force_ordered_fragment: # energy = self.layout_order.calc_energy(fragments_coords) # if energy > self.layout_order.base_energy - 100.0: # print('force_ordered_fragment') # return energy # energy = self.lower_sphere.calc_energy(fragments_coords) # if energy > self.lower_sphere.base_energy - 100.0: # print('lower_sphere') # return energy # if not self.contact_detector.is_contact(fragments_coords): # energy = self.gravitation.calc_energy(fragments_coords) # if energy > self.gravitation.base_energy - 100.0: # self.build_or_extend_gap_umbrella( # current_gap=energy - self.gravitation.base_energy, # fragments_coords=fragments_coords) # print('gravitation') # return energy memorized_energy = self.query_memory_positions(fragments_coords) if memorized_energy is not None: energy = memorized_energy elif self.contact_detector.is_contact(fragments_coords): print('Contact!') self.taboo_current_position(raw_position_only=True) return 1e5 else: mol = self._get_super_molecule(fragments_coords) # energy, final_mol = self.run_mopac(mol) try: # energy = self.run_xtb(mol) energy, final_mol = self.run_xtb(mol) final_mol = BabelMolAdaptor(final_mol)._obmol except ValueError as e: print(e) self.taboo_current_position(raw_position_only=True) return 1e5 if energy < self.global_best_energy: print(f'New global best energy {energy} found') self.global_best_energy = energy # shutil.copy("xtbopt.xyz", os.path.join(cur_dir, "best_mol.xyz")) from rubicon.utils.ion_arranger.ion_arranger import IonPlacer final_coords = IonPlacer.normalize_molecule(final_mol) self.current_optimized_position = final_coords if self.is_current_position_tabooed(position_type="optimized"): self.taboo_current_position(raw_position_only=True) print("Position is tabooed. Aborting.") return tabooed_energy # energy = round(energy, 5) # if self.is_optimized_position_inside_the_best_promixity( # final_coords): # print('position is inside proxmity') if self.best_energy is None or energy < self.best_energy or self.current_best_optimized_position is None: self.best_energy = energy # print(f'New best energy {energy} assigned') self.current_best_optimized_position = final_coords self.current_best_raw_position = list( itertools.chain(*fragments_coords)) # if len( # self.sqm_umbrella_centers) == 0 and self.arranger is not None: # self.arranger.clean_swarm_memory() # self.gap_umbrella = None # self.gap_umbrella_centers = [] # self.sqm_umbrella_centers.append( # self.current_best_raw_position) self.best_mol = final_mol self.best_run_number = self.run_number else: energy = tabooed_energy self.append_position_to_memory(fragments_coords, energy) # coarse grained energy, # make potential energy surface simpler return energy
coords_list = [] for line in f: line_splitted = line.split() species_list.append(line_splitted[0]) line_splitted.pop(0) line_splitted_float = [] for coord in line_splitted: coord = float(coord) line_splitted_float.append(coord) coords_list.append(line_splitted_float) mol = Molecule(species_list, coords_list) """ Find equivalent non-H sites then store a list of non-H atom without equivalent sites (label_list). """ mapper = InchiMolAtomMapper() labelInfo = mapper._inchi_labels(BabelMolAdaptor(mol)._obmol) # print(labelInfo,len(labelInfo)) label_list = list(labelInfo[0]) for equivalent_group in labelInfo[1]: for atomLabel in range(0, len(equivalent_group)): if atomLabel > 0: label_list.remove(labelInfo[0][equivalent_group[atomLabel] - 1]) print("Atomic numbers of non-H non-equivalent sites are: ", label_list) """ Find a list of sites that can be substituted using functional groups. These sites are hydrogen that directly bound to either C or N atoms. """ substitute_sitelist = [] for i in label_list: if mol[i - 1].species_string in ["C", "N"]: """
def test_make3d(self): mol_0d = pb.readstring("smi", "CCCC").OBMol adaptor = BabelMolAdaptor(mol_0d) adaptor.make3d() self.assertEqual(mol_0d.GetDimension(), 3)
def submit_reaction(self, reactant_snls, product_snls, reactant_fragments, product_fragments, submitter_email, parameters=None): """ Submit a reaction. This task will be separated to several single point energy calculations, and submitted as individual molecule. Args: reactant_snls: List of tuple(snl, count, nickname). product_snls: List of tuple(snl, count, nickname). reactant_fragments: BSSE fragments definition. (BSSEFragment) product_fragments: BSSE fragments definition. (BSSEFragment) submitter_email: Email. parameters: dict of parameter. Expected parameters are 1) method: QChem theoretival method. e.g. B3LYP-XDM/6-31+G*; 2) solvent: implicit solvent in energy calcuation. e.g. THF; ... """ reaction_element_count = defaultdict(lambda: 0) for snl, n, nick_name in reactant_snls: mol = snl.structure for site in mol.sites: element = site.specie.symbol reaction_element_count[element] += n product_element_count = defaultdict(lambda: 0) for snl, n, nick_name in product_snls: mol = snl.structure for site in mol.sites: element = site.specie.symbol product_element_count[element] += n if reaction_element_count != product_element_count: raise Exception( "Number of atoms is inconsistant in reactant and product") reactant_inchis = [] product_inchis = [] num_reactants = [] num_products = [] reactant_nicknames = [] product_nicknames = [] reactant_charges = [] product_charges = [] reactant_spin_multiplicities = [] product_spin_multiplicities = [] for snl, n, nick_name in reactant_snls: mol = snl.structure bb = BabelMolAdaptor(mol) pbmol = bb.pybel_mol inchi = pbmol.write("inchi").strip() reactant_inchis.append(inchi) reactant_nicknames.append(nick_name) num_reactants.append(n) reactant_charges.append(mol.charge) reactant_spin_multiplicities.append(mol.spin_multiplicity) for snl, n, nick_name in product_snls: mol = snl.structure bb = BabelMolAdaptor(mol) pbmol = bb.pybel_mol inchi = pbmol.write("inchi").strip() product_inchis.append(inchi) product_nicknames.append(nick_name) num_products.append(n) product_charges.append(mol.charge) product_spin_multiplicities.append(mol.spin_multiplicity) all_inchis = reactant_inchis + product_inchis d = dict() d['submitter_email'] = submitter_email d['parameters'] = parameters d['state'] = 'SUBMITTED' d['reaction_id'] = self._get_next_reaction_id() d['submitted_at'] = datetime.datetime.utcnow().isoformat() d["reactant_snls"] = [s[0].as_dict() for s in reactant_snls] d["product_snls"] = [s[0].as_dict() for s in product_snls] d['all_inchis'] = all_inchis d['reactant_inchis'] = reactant_inchis d['product_inchis'] = product_inchis d['num_reactants'] = num_reactants d['num_products'] = num_products d['reactant_nicknames'] = reactant_nicknames d['product_nicknames'] = product_nicknames d['reactant_charges'] = reactant_charges d['product_charges'] = product_charges d['reactant_spin_multiplicities'] = reactant_spin_multiplicities d['product_spin_multiplicities'] = product_spin_multiplicities d['reactant_fragments'] = [[frag.to_dict() for frag in specie] for specie in reactant_fragments] d['product_fragments'] = [[frag.to_dict() for frag in specie] for specie in product_fragments] self.reactions.insert(d) dummy_snl = StructureNL.from_dict(d["reactant_snls"][0]) parameters['reaction_id'] = d['reaction_id'] self.submit_snl(dummy_snl, submitter_email, parameters) return d['reaction_id']
def main(): def gcd(a, b): if b == 0: return a else: return gcd(b, a % b) def lcm(a, b): return a * b / gcd(a, b) import argparse parser = argparse.ArgumentParser( description="Place salt around a molecule") parser.add_argument("-m", "--molecule", dest="molecule", type=str, required=True, help="the file name of molecule") parser.add_argument("-l", "--ligand", dest="fragments", type=str, nargs='+', required=True, help="the list of fragment file names to to be placed around the molecule") parser.add_argument("-n", "--nums_fragments", dest="nums_fragments", type=int, nargs='+', required=True, help="the number of each fragment, the order must be the same with FRAGMENTS") parser.add_argument("-c", "--charge", dest="charge", type=int, required=True, help="total charge of the system") parser.add_argument("-t", "--taboo_tolerance", dest="taboo_tolerance", type=float, default=1.0, help="The radius to taboo a solution (in Angstrom)") parser.add_argument("-r", "--ratio_taboo_particles", dest="ratio_taboo_particles", type=float, default=0.5, help="ratio of particle within the tolerance to consider taboo current solution") parser.add_argument("-o", "--outputfile", dest="outputfile", type=str, required=True, help="the file name of the aligned conformer") parser.add_argument("-i", "--iterations", dest="iterations", type=int, default=600, help="maximum number of evaluations") parser.add_argument("-s", "--size", dest="size", type=int, default=15, help="population size") parser.add_argument("-k", "--num_neighbours", dest="num_neighbours", type=int, default=2, help="number of neighbours") parser.add_argument("--force_ordered_fragment", dest="force_ordered_fragment", action="store_true", help="set this option to keep the fragment of the same in the order of input along the X-axis") parser.add_argument("--topology", dest="topology", choices=["ring", "star"], type=str, default="ring", help="the topology of the PSO information network") parser.add_argument("--initial_guess", dest="initial_guess", choices=["breadth", "center", "volume"], default="breadth", help="where should particles should be initially put") parser.add_argument("--bound_setter", dest="bound_setter", choices=["chain", "volume"], default="chain", help="method to set the bound conditions of PSO") parser.add_argument("--always_write_best", dest="always_write_best", action="store_true", help="enable this option to output the best structure at every iteration") parser.add_argument("--random_seed", dest="random_seed", default=None, type=int, help="random seed for PSO, an integer is expected") parser.add_argument("--max_generations_each_conformer", dest="max_generations_each_conformer", default=100, type=int, help="maximum generations for each conformer") parser.add_argument("-e", "--evaluator", dest="evaluator", type=str, default="hardsphere", choices=["hardsphere", "sqm"], help="Energy Evaluator") options = parser.parse_args() if options.evaluator == 'hardsphere': qcout_molecule = QcOutput(options.molecule) qcout_cation = QcOutput(options.cation) qcout_anion = QcOutput(options.anion) total_charge_cation = qcout_cation.data[0]["molecules"][-1].charge total_charge_anion = qcout_anion.data[0]["molecules"][-1].charge total_charge_mol = qcout_molecule.data[0]["molecules"][-1].charge num_lcm = lcm(total_charge_cation, -total_charge_anion) num_cation = num_lcm / total_charge_cation num_anion = num_lcm / -total_charge_anion pymatgen_mol_molecule = qcout_molecule.data[0]["molecules"][-1] pymatgen_mol_cation = qcout_cation.data[0]["molecules"][-1] pymatgen_mol_anion = qcout_anion.data[0]["molecules"][-1] # noinspection PyProtectedMember molecule = BabelMolAdaptor(pymatgen_mol_molecule)._obmol # noinspection PyProtectedMember obmol_cation = BabelMolAdaptor(pymatgen_mol_cation)._obmol # noinspection PyProtectedMember obmol_anion = BabelMolAdaptor(pymatgen_mol_anion)._obmol energy_evaluator = HardSphereElectrostaticEnergyEvaluator.from_qchem_output( qcout_molecule, qcout_cation, qcout_anion) fragments = [obmol_cation, obmol_anion] else: # noinspection PyProtectedMember molecule = BabelMolAdaptor.from_file(options.molecule, os.path.splitext( options.molecule)[1][ 1:])._obmol fragments = [] for frag_file in options.fragments: file_format = os.path.splitext(frag_file)[1][1:] # noinspection PyProtectedMember fragments.append( BabelMolAdaptor.from_file(frag_file, file_format)._obmol) energy_evaluator = SemiEmpricalQuatumMechanicalEnergyEvaluator( molecule, fragments, options.nums_fragments, total_charge=options.charge, taboo_tolerance_ang=options.taboo_tolerance, force_order_fragment=options.force_ordered_fragment, bound_setter=options.bound_setter) if len(fragments) != len(options.nums_fragments): raise ValueError( "you must specify the duplicated count for every fragment") placer = IonPlacer(molecule=molecule, fragments=fragments, nums_fragments=options.nums_fragments, energy_evaluator=energy_evaluator, taboo_tolerance_ang=options.taboo_tolerance, taboo_tolerance_particle_ratio=options.ratio_taboo_particles, topology=options.topology, initial_guess=options.initial_guess, bound_setter=options.bound_setter, always_write_best=options.always_write_best, random_seed=options.random_seed, max_generations_each_conformer=options.max_generations_each_conformer) energy_evaluator.arranger = placer placer.place(max_evaluations=options.iterations, pop_size=options.size, neighborhood_size=options.num_neighbours) print('It took {:.1f} seconds to place the salt'.format(placer .playing_time))
def get_task_doc(cls, path, fw_spec=None): """ Get the entire task doc for a path, including any post-processing. """ logger.info("Getting task doc for file:{}".format(path)) qcout = QcOutput(zpath(path)) data = qcout.data initial_mol = data[0]["molecules"][0] mol = data[0]["molecules"][-1] if data[0]["jobtype"] == "freq": mol = Molecule.from_dict(initial_mol.as_dict()) bb = BabelMolAdaptor(mol) pbmol = bb.pybel_mol xyz = XYZ(mol) smiles = pbmol.write(str("smi")).split()[0] can = pbmol.write(str("can")).split()[0] inchi_final = pbmol.write(str("inchi")).strip() svg = cls.modify_svg(cls.xyz2svg(xyz)) comp = mol.composition charge = mol.charge spin_mult = mol.spin_multiplicity data_dict = {} pga = PointGroupAnalyzer(mol) sch_symbol = pga.sch_symbol stationary_type = None has_structure_changing_job = False for d in data: if d["jobtype"] == "opt": data_dict["geom_opt"] = d has_structure_changing_job = True elif d["jobtype"] == "freq": data_dict["freq"] = d has_structure_changing_job = True if not d["has_error"]: if d['frequencies'][0]["frequency"] < -0.00: # it is stupied that -0.00 is less than 0.00 stationary_type = "non-minimum" else: stationary_type = "minimum" else: stationary_type = "unknown" elif d["jobtype"] == "sp": suffix = "" if d["solvent_method"] == "NA" \ else "_" + d["solvent_method"] data_dict["scf" + suffix] = d elif d["jobtype"] == "aimd": data_dict["amid"] = d has_structure_changing_job = True data = data_dict d = { "path": os.path.abspath(path), "folder": os.path.basename(os.path.dirname(os.path.abspath(path))), "calculations": data, "molecule_initial": initial_mol.as_dict(), "molecule_final": mol.as_dict(), "pointgroup": sch_symbol, "pretty_formula": comp.reduced_formula, "reduced_cell_formula_abc": comp.alphabetical_formula, "formula": comp.formula, "charge": charge, "spin_multiplicity": spin_mult, "composition": comp.as_dict(), "elements": list(comp.as_dict().keys()), "nelements": len(comp), "smiles": smiles, "can": can, "inchi_final": inchi_final, "svg": svg, "xyz": str(xyz), "names": get_nih_names(smiles) } if stationary_type: d['stationary_type'] = stationary_type if fw_spec: inchi_initial = fw_spec['inchi'] if inchi_initial != d['inchi_final']: d['inchi_changed'] = True else: d['inchi_changed'] = False if has_structure_changing_job: d['structure_changed'] = cls._check_structure_change( initial_mol, mol, path) else: d['structure_changed'] = False if d['structure_changed']: d['state'] = 'rejected' d['reject_reason'] = 'structural change' if "state" not in d: for v in data_dict.values(): if v['has_error']: d['state'] = "error" errors = d.get("errors", []) errors += v["errors"] d["errors"] = errors if "state" not in d: d["state"] = "successful" return jsanitize(d)
def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun): try: fullpath = os.path.abspath(dir_name) d = jsanitize(self.additional_fields, strict=True) d["schema"] = { "code": "atomate", "version": QChemDrone.__version__ } d["dir_name"] = fullpath # If a saved "orig" input file is present, parse it incase the error handler made changes # to the initial input molecule or rem params, which we might want to filter for later if len(qcinput_files) > len(qcoutput_files): orig_input = QCInput.from_file( os.path.join(dir_name, qcinput_files.pop("orig"))) d["orig"] = {} d["orig"]["molecule"] = orig_input.molecule.as_dict() d["orig"]["molecule"]["charge"] = int( d["orig"]["molecule"]["charge"]) d["orig"]["rem"] = orig_input.rem d["orig"]["opt"] = orig_input.opt d["orig"]["pcm"] = orig_input.pcm d["orig"]["solvent"] = orig_input.solvent d["orig"]["smx"] = orig_input.smx if multirun: d["calcs_reversed"] = self.process_qchem_multirun( dir_name, qcinput_files, qcoutput_files) else: d["calcs_reversed"] = [ self.process_qchemrun(dir_name, taskname, qcinput_files.get(taskname), output_filename) for taskname, output_filename in qcoutput_files.items() ] # reverse the calculations data order so newest calc is first d["calcs_reversed"].reverse() d_calc_init = d["calcs_reversed"][-1] d_calc_final = d["calcs_reversed"][0] d["input"] = { "initial_molecule": d_calc_init["initial_molecule"], "job_type": d_calc_init["input"]["rem"]["job_type"] } d["output"] = { "initial_molecule": d_calc_final["initial_molecule"], "job_type": d_calc_final["input"]["rem"]["job_type"] } if d["output"]["job_type"] == "opt" or d["output"][ "job_type"] == "optimization": if "molecule_from_optimized_geometry" in d_calc_final: d["output"]["optimized_molecule"] = d_calc_final[ "molecule_from_optimized_geometry"] d["output"]["final_energy"] = d_calc_final["final_energy"] else: d["output"]["final_energy"] = "unstable" if d_calc_final["opt_constraint"]: d["output"]["constraint"] = [ d_calc_final["opt_constraint"][0], float(d_calc_final["opt_constraint"][6]) ] if d["output"]["job_type"] == "freq" or d["output"][ "job_type"] == "frequency": d["output"]["frequencies"] = d_calc_final["frequencies"] d["output"]["enthalpy"] = d_calc_final["total_enthalpy"] d["output"]["entropy"] = d_calc_final["total_entropy"] if d["input"]["job_type"] == "opt" or d["input"][ "job_type"] == "optimization": d["output"]["optimized_molecule"] = d_calc_final[ "initial_molecule"] d["output"]["final_energy"] = d["calcs_reversed"][1][ "final_energy"] if d["output"]["job_type"] == "sp": d["output"]["final_energy"] = d_calc_final["final_energy"] if d_calc_final["completion"]: total_cputime = 0.0 total_walltime = 0.0 for calc in d["calcs_reversed"]: if calc["walltime"] is not None: total_walltime += calc["walltime"] if calc["cputime"] is not None: total_cputime += calc["cputime"] d["walltime"] = total_walltime d["cputime"] = total_cputime else: d["walltime"] = None d["cputime"] = None comp = d["output"]["initial_molecule"].composition d["formula_pretty"] = comp.reduced_formula d["formula_anonymous"] = comp.anonymized_formula d["chemsys"] = "-".join(sorted(set(d_calc_final["species"]))) if d_calc_final["point_group"] != None: d["pointgroup"] = d_calc_final["point_group"] else: try: d["pointgroup"] = PointGroupAnalyzer( d["output"]["initial_molecule"]).sch_symbol except ValueError: d["pointgroup"] = "PGA_error" bb = BabelMolAdaptor(d["output"]["initial_molecule"]) pbmol = bb.pybel_mol smiles = pbmol.write(str("smi")).split()[0] d["smiles"] = smiles d["state"] = "successful" if d_calc_final[ "completion"] else "unsuccessful" if "special_run_type" in d: if d["special_run_type"] == "frequency_flattener": d["num_frequencies_flattened"] = int((len(qcinput_files) / 2) - 1) if d["state"] == "successful": if d_calc_final["frequencies"][ 0] < 0: # If a negative frequency remains, d["state"] = "unsuccessful" # then the flattening was unsuccessful d["last_updated"] = datetime.datetime.utcnow() return d except Exception: logger.error(traceback.format_exc()) logger.error("Error in " + os.path.abspath(dir_name) + ".\n" + traceback.format_exc()) raise
def get_inchi(mol): bb = BabelMolAdaptor(mol) pbmol = bb.pybel_mol return pbmol.write(str("inchi")).strip()
dirname = filename[:-4] if os.path.exists(dirname): print("directory " + dirname + " already exists") print("please delete it before use this script") exit(0) for filename in filenames: dirname = filename[:-4] os.mkdir(dirname) print("reading", filename) text = None with open(filename) as f: text = f.read() mol_tokens = str_to_obmols(text) for (i, (mol, cas)) in enumerate(mol_tokens): print("processing molecule", i + 1, cas, "of", len(mol_tokens), "molecules") try: build3d(mol) except: os.system("echo " + cas + " >> failed_mols.txt") pmg_mols = [(BabelMolAdaptor(obmol).pymatgen_mol, cas) for (obmol, cas) in mol_tokens] snl_texts = [ StructureNL(mol, "Xiaohui Qu <*****@*****.**>", remarks=cas) for (mol, cas) in pmg_mols ] for snl in snl_texts: with open(dirname + "/" + snl.remarks[0] + ".snl", 'w') as f: json.dump(snl.as_dict(), f, indent=4) print("Done")
def generate_doc(self, path, molecule_file, template_file, output_file, isomers_file, internal_coordinate_file, optimized_geom_file): try: fullpath = os.path.abspath(path) d = jsanitize(self.additional_fields, strict=True) d["schema"] = {"code": "atomate", "version": GSMDrone.__version__} d["dir_name"] = fullpath # TODO: Consider error handlers # Include an "orig" section to the doc # Parse all relevant files initial_mol = parse_multi_xyz(molecule_file) temp_file = QCTemplate.from_file(template_file) if isomers_file is not None: iso_file = GSMIsomerInput.from_file(isomers_file) out_file = GSMOutput(output_file) if internal_coordinate_file is not None: ic_file = GSMInternalCoordinateDataParser( internal_coordinate_file) opt_file = GSMOptimizedStringParser(optimized_geom_file) d["warnings"] = dict() # INPUTS d["input"] = dict() d["input"]["initial_reactants"] = None d["input"]["initial_products"] = None if len(initial_mol) == 1: d["input"]["initial_reactants"] = initial_mol[0] elif len(initial_mol) == 2: d["input"]["initial_reactants"] = initial_mol[0] d["input"]["initial_products"] = initial_mol[1] d["input"]["mode"] = out_file.data["inputs"]["gsm_type"] num_nodes = out_file.data["inputs"].get("num_nodes") if num_nodes is None: if "SE" in d["input"]["mode"]: d["input"]["num_nodes"] = 30 else: d["input"]["num_nodes"] = 9 else: d["input"]["num_nodes"] = int(num_nodes) d["input"]["reactants_fixed"] = out_file.data["inputs"].get( "reactant_geom_fixed", False) d["input"]["products_fixed"] = out_file.data["inputs"].get( "product_geom_fixed", False) d["input"]["template"] = { "rem": temp_file.rem, "pcm": temp_file.pcm, "solvent": temp_file.solvent, "smx": temp_file.smx } if "SE" in d["input"]["mode"]: if isomers_file is None: raise ValueError( "No isomers file provided for single-ended calculation." ) else: d["input"]["isomers"] = { "bonds_formed": iso_file.bonds_formed, "bonds_broken": iso_file.bonds_broken, "angles": iso_file.angles, "torsions": iso_file.torsions, "out_of_planes": iso_file.out_of_planes } d["input"]["parameters"] = out_file.data["inputs"] # OUTPUTS d["output"] = dict() d["output"]["completion"] = out_file.data["completion"] if "SE" in d["input"]["mode"]: d["output"]["initial_energy"] = out_file.data.get( "initial_energy", None) d["driving_coord_trajectories"] = out_file.data.get( "driving_coord_trajectories", None) else: d["output"]["initial_energy_rct"] = out_file.data.get( "initial_energy_rct", None) d["output"]["initial_energy_pro"] = out_file.data.get( "initial_energy_pro", None) d["output"]["energy_profile"] = out_file.data.get( "final_energy_profile", None) d["output"]["path_uphill"] = out_file.data.get( "final_energy_profile", None) d["output"]["path_dissociative"] = out_file.data.get( "final_path_dissociative", None) d["output"]["minima_nodes"] = out_file.data.get( "final_min_nodes", None) d["output"]["maxima_nodes"] = out_file.data.get( "final_max_nodes", None) d["output"]["minima_nodes"] = out_file.data.get( "final_min_nodes", None) d["output"]["maximum_node"] = out_file.data.get( "final_max_node", None) d["output"]["maximum_energy"] = out_file.data.get( "final_max_energy", None) if d["output"]["completion"]: d["output"]["reactant_node"] = out_file.data["min_rct_node"] d["output"]["product_node"] = out_file.data["min_pro_node"] d["output"]["ts_node"] = out_file.data["ts_node"] d["output"]["absolute_ts_energy"] = out_file.data[ "absolute_ts_energy"] d["output"]["ts_energy"] = out_file.data["ts_energy"] d["output"]["delta_e"] = out_file.data["delta_e"] else: d["output"]["reactant_node"] = None d["output"]["product_node"] = None d["output"]["ts_node"] = None d["output"]["ts_energy"] = None d["output"]["absolute_ts_energy"] = None d["output"]["delta_e"] = None if d["output"]["completion"]: if internal_coordinate_file is not None: d["output"]["internal_coords"] = ic_file.data else: d["output"]["internal_coords"] = None d["output"]["species"] = opt_file.data["species"] d["output"]["optimized_node_geometries"] = opt_file.data[ "geometries"] d["output"]["optimized_node_molecules"] = opt_file.data[ "molecules"] d["output"]["optimized_node_energies"] = opt_file.data[ "energies"] d["output"]["optimized_node_forces"] = opt_file.data["forces"] if d["output"]["ts_node"] is not None: d["output"]["ts_molecule"] = d["output"][ "optimized_node_molecules"][d["output"]["ts_node"]] else: d["output"]["ts_molecule"] = None if d["output"]["reactant_node"] is not None: d["output"]["reactant_molecule"] = d["output"][ "optimized_node_molecules"][d["output"] ["reactant_node"]] else: d["output"]["reactant_molecule"] = None if d["output"]["product_node"] is not None: d["output"]["product_molecule"] = d["output"][ "optimized_node_molecules"][d["output"] ["product_node"]] else: d["output"]["product_molecule"] = None else: d["output"]["internal_coords"] = None d["output"]["species"] = None d["output"]["optimized_node_geometries"] = None d["output"]["optimized_node_molecules"] = None d["output"]["optimized_node_energies"] = None d["output"]["optimized_node_forces"] = None d["output"]["ts_molecule"] = None d["output"]["reactant_molecule"] = None d["output"]["product_molecule"] = None d["calc"] = out_file.data d["warnings"] = out_file.data["warnings"] d["errors"] = out_file.data["errors"] # if d_calc_final["completion"]: # total_cputime = 0.0 # total_walltime = 0.0 # for calc in d["calcs_reversed"]: # if "walltime" in calc and "cputime" in calc: # if calc["walltime"] is not None: # total_walltime += calc["walltime"] # if calc["cputime"] is not None: # total_cputime += calc["cputime"] # d["walltime"] = total_walltime # d["cputime"] = total_cputime # else: # d["walltime"] = None # d["cputime"] = None comp = d["input"]["initial_reactants"].composition d["formula_pretty"] = comp.reduced_formula d["formula_anonymous"] = comp.anonymized_formula d["formula_alphabetical"] = comp.alphabetical_formula elements = list() for component in d["formula_alphabetical"].split(" "): elements.append("".join( [i for i in component if not i.isdigit()])) d["chemsys"] = "-".join(sorted(set(elements))) if d["output"]["ts_molecule"] is not None: try: d["pointgroup_ts"] = PointGroupAnalyzer( d["output"]["ts_molecule"]).sch_symbol except ValueError: d["pointgroup_ts"] = "PGA_error" else: d["pointgroup_ts"] = None if d["output"]["reactant_molecule"] is not None: try: d["pointgroup_reactant"] = PointGroupAnalyzer( d["output"]["reactant_molecule"]).sch_symbol except ValueError: d["pointgroup_reactant"] = "PGA_error" else: d["pointgroup_reactant"] = None if d["output"]["product_molecule"] is not None: try: d["pointgroup_product"] = PointGroupAnalyzer( d["output"]["product_molecule"]).sch_symbol except ValueError: d["pointgroup_product"] = "PGA_error" else: d["pointgroup_product"] = None if d["output"]["ts_molecule"] is not None: bb = BabelMolAdaptor(d["output"]["ts_molecule"]) pbmol = bb.pybel_mol smiles = pbmol.write(str("smi")).split()[0] d["smiles"] = smiles else: d["smiles"] = None d["state"] = "successful" if d["output"][ "completion"] else "unsuccessful" d["last_updated"] = datetime.datetime.utcnow() return d except Exception: logger.error(traceback.format_exc()) logger.error("Error in " + os.path.abspath(path) + ".\n" + traceback.format_exc()) raise
def __init__(self, molecule, optimize=False): """ Instantiation method for FunctionalGroupExtractor. :param molecule: Either a filename, a pymatgen.core.structure.Molecule object, or a pymatgen.analysis.graphs.MoleculeGraph object. :param optimize: Default False. If True, then the input molecule will be modified, adding Hydrogens, performing a simple conformer search, etc. """ self.molgraph = None if isinstance(molecule, str): try: if optimize: obmol = BabelMolAdaptor.from_file(molecule, file_format="mol") # OBMolecule does not contain pymatgen Molecule information # So, we need to wrap the obmol in a BabelMolAdapter obmol.add_hydrogen() obmol.make3d() obmol.localopt() self.molecule = obmol.pymatgen_mol else: self.molecule = Molecule.from_file(molecule) except OSError: raise ValueError("Input must be a valid molecule file, a " "Molecule object, or a MoleculeGraph object.") elif isinstance(molecule, Molecule): if optimize: obmol = BabelMolAdaptor(molecule) obmol.add_hydrogen() obmol.make3d() obmol.localopt() self.molecule = obmol.pymatgen_mol else: self.molecule = molecule elif isinstance(molecule, MoleculeGraph): if optimize: obmol = BabelMolAdaptor(molecule.molecule) obmol.add_hydrogen() obmol.make3d() obmol.localopt() self.molecule = obmol.pymatgen_mol else: self.molecule = molecule.molecule self.molgraph = molecule else: raise ValueError("Input to FunctionalGroupExtractor must be" "str, Molecule, or MoleculeGraph.") if self.molgraph is None: self.molgraph = MoleculeGraph.with_local_env_strategy( self.molecule, OpenBabelNN()) # Assign a specie and coordinates to each node in the graph, # corresponding to the Site in the Molecule object self.molgraph.set_node_attributes() self.species = nx.get_node_attributes(self.molgraph.graph, "specie")
def create_rdkit_mol_from_mol_graph(mol_graph, name=None, force_sanitize=False, metals={ "Li": 1, "Mg": 2 }): """ Create a rdkit molecule from molecule graph, with bond type perceived by babel. Done in the below steps: 1. create a babel mol without metal atoms. 2. perceive bond order (conducted by BabelMolAdaptor) 3. adjust formal charge of metal atoms so as not to violate valence rule 4. create rdkit mol based on species, coords, bonds, and formal charge Args: mol_graph (pymatgen MoleculeGraph): molecule graph name (str): name of the molecule force_sanitize (bool): whether to force sanitization of the rdkit mol metals dict: with metal atom (str) as key and the number of valence electrons as key. Returns: m: rdkit Chem.Mol bond_types (dict): bond types assigned to the created rdkit mol """ pymatgen_mol = mol_graph.molecule species = [str(s) for s in pymatgen_mol.species] coords = pymatgen_mol.cart_coords bonds = [ tuple(sorted([i, j])) for i, j, attr in mol_graph.graph.edges.data() ] # create babel mol without metals pmg_mol_no_metals = remove_metals(pymatgen_mol) adaptor = BabelMolAdaptor(pmg_mol_no_metals) ob_mol = adaptor.openbabel_mol # get babel bond order of mol without metals ob_bond_order = {} for bd in ob.OBMolBondIter(ob_mol): k = tuple(sorted([bd.GetBeginAtomIdx(), bd.GetEndAtomIdx()])) v = bd.GetBondOrder() ob_bond_order[k] = v # create bond type atom_idx_mapping = pymatgen_2_babel_atom_idx_map(pymatgen_mol, ob_mol) bond_types = {} for bd in bonds: try: ob_bond = [atom_idx_mapping[a] for a in bd] # atom not in ob mol if None in ob_bond: raise KeyError # atom in ob mol else: ob_bond = tuple(sorted(ob_bond)) v = ob_bond_order[ob_bond] if v == 0: tp = BondType.UNSPECIFIED elif v == 1: tp = BondType.SINGLE elif v == 2: tp = BondType.DOUBLE elif v == 3: tp = BondType.TRIPLE elif v == 5: tp = BondType.AROMATIC else: raise RuntimeError(f"Got unexpected babel bond order: {v}") except KeyError: atom1_spec, atom2_spec = [species[a] for a in bd] if atom1_spec in metals and atom2_spec in metals: raise RuntimeError("Got a bond between two metal atoms") # bond involves one and only one metal atom (atom not in ob mol case above) elif atom1_spec in metals or atom2_spec in metals: tp = Chem.rdchem.BondType.DATIVE # Dative bonds have the special characteristic that they do not affect # the valence on the start atom, but do affect the end atom. # Here we adjust the atom ordering in the bond for dative bond to make # metal the end atom. if atom1_spec in metals: bd = tuple(reversed(bd)) # bond not found by babel (atom in ob mol) else: tp = Chem.rdchem.BondType.UNSPECIFIED bond_types[bd] = tp # a metal atom can form multiple dative bond (e.g. bidentate LiEC), for such cases # we need to adjust the their formal charge so as not to violate valence rule formal_charge = adjust_formal_charge(species, bonds, metals) m = create_rdkit_mol(species, coords, bond_types, formal_charge, name, force_sanitize) return m, bond_types
def uniform_labels(self, mol1, mol2): """ Pair the geometrically equivalent atoms of the molecules. Calculate RMSD on all possible isomorphism mappings and return mapping with the least RMSD Args: mol1: First molecule. OpenBabel OBMol or pymatgen Molecule object. mol2: Second molecule. OpenBabel OBMol or pymatgen Molecule object. Returns: (list1, list2) if uniform atom order is found. list1 and list2 are for mol1 and mol2, respectively. Their length equal to the number of atoms. They represents the uniform atom order of the two molecules. The value of each element is the original atom index in mol1 or mol2 of the current atom in uniform atom order. (None, None) if unform atom is not available. """ obmol1 = BabelMolAdaptor(mol1).openbabel_mol obmol2 = BabelMolAdaptor(mol2).openbabel_mol h1 = self.get_molecule_hash(obmol1) h2 = self.get_molecule_hash(obmol2) if h1 != h2: return None, None query = ob.CompileMoleculeQuery(obmol1) isomapper = ob.OBIsomorphismMapper.GetInstance(query) isomorph = ob.vvpairUIntUInt() isomapper.MapAll(obmol2, isomorph) sorted_isomorph = [ sorted(x, key=lambda morp: morp[0]) for x in isomorph ] label2_list = tuple( [tuple([p[1] + 1 for p in x]) for x in sorted_isomorph]) vmol1 = obmol1 aligner = ob.OBAlign(True, False) aligner.SetRefMol(vmol1) least_rmsd = float("Inf") best_label2 = None label1 = list(range(1, obmol1.NumAtoms() + 1)) # noinspection PyProtectedMember elements1 = InchiMolAtomMapper._get_elements(vmol1, label1) for label2 in label2_list: # noinspection PyProtectedMember elements2 = InchiMolAtomMapper._get_elements(obmol2, label2) if elements1 != elements2: continue vmol2 = ob.OBMol() for i in label2: vmol2.AddAtom(obmol2.GetAtom(i)) aligner.SetTargetMol(vmol2) aligner.Align() rmsd = aligner.GetRMSD() if rmsd < least_rmsd: least_rmsd = rmsd best_label2 = copy.copy(label2) return label1, best_label2
def edges_from_babel(molecule): babel_mol = BabelMolAdaptor(molecule).openbabel_mol edges = [] for obbond in ob.OBMolBondIter(babel_mol): edges += [[obbond.GetBeginAtomIdx() - 1, obbond.GetEndAtomIdx() - 1]] return edges
def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun): try: fullpath = os.path.abspath(dir_name) d = jsanitize(self.additional_fields, strict=True) d["schema"] = { "code": "atomate", "version": QChemDrone.__version__ } d["dir_name"] = fullpath if multirun: d["calcs_reversed"] = self.process_qchem_multirun( dir_name, qcinput_files, qcoutput_files) else: d["calcs_reversed"] = [ self.process_qchemrun(dir_name, taskname, qcinput_files.get(taskname), output_filename) for taskname, output_filename in qcoutput_files.items() ] # reverse the calculations data order so newest calc is first d["calcs_reversed"].reverse() d_calc_init = d["calcs_reversed"][-1] d_calc_final = d["calcs_reversed"][0] d["input"] = { "initial_molecule": d_calc_init["initial_molecule"], "job_type": d_calc_init["input"]["rem"]["job_type"] } d["output"] = { "initial_molecule": d_calc_final["initial_molecule"], "job_type": d_calc_final["input"]["rem"]["job_type"] } if d["output"]["job_type"] == "opt" or d["output"][ "job_type"] == "optimization": d["output"]["optimized_molecule"] = d_calc_final[ "molecule_from_optimized_geometry"] d["output"]["final_energy"] = d_calc_final["final_energy"] if d_calc_final["opt_constraint"]: d["output"]["constraint"] = [ d_calc_final["opt_constraint"][0], float(d_calc_final["opt_constraint"][6]) ] if d["output"]["job_type"] == "freq" or d["output"][ "job_type"] == "frequency": d["output"]["frequencies"] = d_calc_final["frequencies"] d["output"]["enthalpy"] = d_calc_final["enthalpy"] d["output"]["entropy"] = d_calc_final["entropy"] if d["input"]["job_type"] == "opt" or d["input"][ "job_type"] == "optimization": d["output"]["optimized_molecule"] = d_calc_final[ "initial_molecule"] d["output"]["final_energy"] = d["calcs_reversed"][1][ "final_energy"] if "special_run_type" in d: if d["special_run_type"] == "frequency_flattener": d["num_frequencies_flattened"] = (len(qcinput_files) / 2) - 1 total_cputime = 0.0 total_walltime = 0.0 nan_found = False for calc in d["calcs_reversed"]: if calc["walltime"] != "nan": total_walltime += calc["walltime"] else: nan_found = True if calc["cputime"] != "nan": total_cputime += calc["cputime"] else: nan_found = True if nan_found: d["walltime"] = "nan" d["cputime"] = "nan" else: d["walltime"] = total_walltime d["cputime"] = total_cputime comp = d["output"]["initial_molecule"].composition d["formula_pretty"] = comp.reduced_formula d["formula_anonymous"] = comp.anonymized_formula d["chemsys"] = "-".join(sorted(set(d_calc_final["species"]))) d["pointgroup"] = PointGroupAnalyzer( d["output"]["initial_molecule"]).sch_symbol bb = BabelMolAdaptor(d["output"]["initial_molecule"]) pbmol = bb.pybel_mol smiles = pbmol.write(str("smi")).split()[0] d["smiles"] = smiles d["state"] = "successful" if d_calc_final[ "completion"] else "unsuccessful" d["last_updated"] = datetime.datetime.utcnow() return d except Exception: logger.error(traceback.format_exc()) logger.error("Error in " + os.path.abspath(dir_name) + ".\n" + traceback.format_exc()) raise
def get_molecule_data(self, mol_id): """ Compile all useful molecular data for analysis, including molecule size (number of atoms), molecular weight, enthalpy, entropy, and functional groups. NOTE: This function automatically converts energy, enthalpy, and entropy into SI units (J/mol and J/mol*K) :param mol_id: Unique ID associated with the molecule. :return: dict of relevant molecule data. """ mol_data = {"mol_id": mol_id} if self.db is None: raise RuntimeError("Cannot query database; connection is invalid." " Try to connect again.") collection = self.db.db["molecules"] mol_entry = collection.find_one({"mol_id": mol_id}) for calc in mol_entry["calcs_reversed"]: if calc["task"]["name"] in ["freq", "frequency"]: mol_data["enthalpy"] = calc["enthalpy"] * 4.184 * 1000 mol_data["entropy"] = calc["entropy"] * 4.184 if calc["task"]["name"] == "sp": mol_data["energy"] = calc[ "final_energy_sp"] * 627.509 * 4.184 * 1000 if calc["task"]["name"] in ["opt", "optimization"]: mol_dict = calc["molecule_from_optimized_geometry"] mol_data["molecule"] = Molecule.from_dict(mol_dict) adaptor = BabelMolAdaptor(mol_data["molecule"]) pbmol = adaptor.pybel_mol mol_data["number_atoms"] = len(mol_data["molecule"]) mol_data["molecular_weight"] = pbmol.molwt mol_data["tpsa"] = pbmol.calcdesc()["TPSA"] extractor = FunctionalGroupExtractor(mol_data["molecule"]) molgraph = extractor.molgraph func_grps = extractor.get_all_functional_groups() mol_data["functional_groups"] = extractor.categorize_functional_groups( func_grps) weights = nx.get_edge_attributes(molgraph.graph, "weight") bonds_checked = set() double_bonds = 0 triple_bonds = 0 for bond, weight in weights.items(): # Remove index from multidigraph bond = (bond[0], bond[1]) if int(weight) == 2 and bond not in bonds_checked: double_bonds += 1 elif int(weight) == 3 and bond not in bonds_checked: triple_bonds += 1 bonds_checked.add(bond) mol_data["double_bonds"] = double_bonds mol_data["triple_bonds"] = triple_bonds species = [str(s.specie) for s in mol_data["molecule"].sites] mol_data["species"] = dict(Counter(species)) return mol_data