def setUp(self): warnings.simplefilter("ignore") self.file = os.path.join(test_dir, "func_group_test.mol") self.mol = Molecule.from_file(self.file) self.strat = OpenBabelNN() self.mg = MoleculeGraph.with_local_env_strategy(self.mol, self.strat) self.extractor = FunctionalGroupExtractor(self.mg)
def test_init(self): # Ensure that instantiation is equivalent for all valid input types extractor_str = FunctionalGroupExtractor(self.file) extractor_mol = FunctionalGroupExtractor(self.mol) extractor_mg = self.extractor self.assertEqual(extractor_str.molgraph, extractor_mol.molgraph) self.assertEqual(extractor_str.molgraph, extractor_mg.molgraph) self.assertEqual(extractor_str.species, extractor_mol.species) self.assertEqual(extractor_str.species, extractor_mg.species) # Test optimization file_no_h = os.path.join(test_dir, "func_group_test_no_h.mol") extractor_no_h = FunctionalGroupExtractor(file_no_h, optimize=True) self.assertEqual(len(extractor_no_h.molecule), len(extractor_mol.molecule)) self.assertEqual(extractor_no_h.species, extractor_mol.species)
def setUp(self): warnings.simplefilter("ignore") self.file = os.path.join(test_dir, "func_group_test.mol") self.mol = Molecule.from_file(self.file) self.strat = OpenBabelNN() self.mg = MoleculeGraph.with_local_env_strategy(self.mol, self.strat, reorder=False, extend_structure=False) self.extractor = FunctionalGroupExtractor(self.mg)
class FunctionalGroupExtractorTest(unittest.TestCase): def setUp(self): warnings.simplefilter("ignore") self.file = os.path.join(test_dir, "func_group_test.mol") self.mol = Molecule.from_file(self.file) self.strat = OpenBabelNN() self.mg = MoleculeGraph.with_local_env_strategy(self.mol, self.strat, reorder=False, extend_structure=False) self.extractor = FunctionalGroupExtractor(self.mg) def tearDown(self): warnings.simplefilter("default") del self.extractor del self.mg del self.strat del self.mol del self.file def test_init(self): # Ensure that instantiation is equivalent for all valid input types extractor_str = FunctionalGroupExtractor(self.file) extractor_mol = FunctionalGroupExtractor(self.mol) extractor_mg = self.extractor self.assertEqual(extractor_str.molgraph, extractor_mol.molgraph) self.assertEqual(extractor_str.molgraph, extractor_mg.molgraph) self.assertEqual(extractor_str.species, extractor_mol.species) self.assertEqual(extractor_str.species, extractor_mg.species) # Test optimization file_no_h = os.path.join(test_dir, "func_group_test_no_h.mol") extractor_no_h = FunctionalGroupExtractor(file_no_h, optimize=True) self.assertEqual(len(extractor_no_h.molecule), len(extractor_mol.molecule)) self.assertEqual(extractor_no_h.species, extractor_mol.species) def test_get_heteroatoms(self): heteroatoms = self.extractor.get_heteroatoms() hetero_species = [self.extractor.species[x] for x in heteroatoms] self.assertEqual(len(heteroatoms), 3) self.assertEqual(sorted(hetero_species), ["N", "O", "O"]) # Test with limitation hetero_no_o = self.extractor.get_heteroatoms(elements=["N"]) self.assertEqual(len(hetero_no_o), 1) def test_get_special_carbon(self): special_cs = self.extractor.get_special_carbon() self.assertEqual(len(special_cs), 4) # Test with limitation special_cs_no_o = self.extractor.get_special_carbon(elements=["N"]) self.assertEqual(len(special_cs_no_o), 2) def test_link_marked_atoms(self): heteroatoms = self.extractor.get_heteroatoms() special_cs = self.extractor.get_special_carbon() link = self.extractor.link_marked_atoms(heteroatoms.union(special_cs)) self.assertEqual(len(link), 1) self.assertEqual(len(link[0]), 9) # Exclude Oxygen-related functional groups heteroatoms_no_o = self.extractor.get_heteroatoms(elements=["N"]) special_cs_no_o = self.extractor.get_special_carbon(elements=["N"]) all_marked = heteroatoms_no_o.union(special_cs_no_o) link_no_o = self.extractor.link_marked_atoms(all_marked) self.assertEqual(len(link_no_o), 2) def test_get_basic_functional_groups(self): basics = self.extractor.get_basic_functional_groups() # Molecule has one methyl group which will be caught. self.assertEqual(len(basics), 1) self.assertEqual(len(basics[0]), 4) basics_no_methyl = self.extractor.get_basic_functional_groups( func_groups=["phenyl"]) self.assertEqual(len(basics_no_methyl), 0) def test_get_all_functional_groups(self): heteroatoms = self.extractor.get_heteroatoms() special_cs = self.extractor.get_special_carbon() link = self.extractor.link_marked_atoms(heteroatoms.union(special_cs)) basics = self.extractor.get_basic_functional_groups() all_func = self.extractor.get_all_functional_groups() self.assertEqual(len(all_func), (len(link) + len(basics))) self.assertEqual(sorted(all_func), sorted(link + basics)) def test_categorize_functional_groups(self): all_func = self.extractor.get_all_functional_groups() categorized = self.extractor.categorize_functional_groups(all_func) self.assertTrue("O=C1C=CC(=O)[N]1" in categorized.keys()) self.assertTrue("[CH3]" in categorized.keys()) total_count = sum([c["count"] for c in categorized.values()]) self.assertEqual(total_count, 2)
class FunctionalGroupExtractorTest(unittest.TestCase): def setUp(self): warnings.simplefilter("ignore") self.file = os.path.join(test_dir, "func_group_test.mol") self.mol = Molecule.from_file(self.file) self.strat = OpenBabelNN() self.mg = MoleculeGraph.with_local_env_strategy(self.mol, self.strat, reorder=False, extend_structure=False) self.extractor = FunctionalGroupExtractor(self.mg) def tearDown(self): warnings.simplefilter("default") del self.extractor del self.mg del self.strat del self.mol del self.file def test_init(self): # Ensure that instantiation is equivalent for all valid input types extractor_str = FunctionalGroupExtractor(self.file) extractor_mol = FunctionalGroupExtractor(self.mol) extractor_mg = self.extractor self.assertEqual(extractor_str.molgraph, extractor_mol.molgraph) self.assertEqual(extractor_str.molgraph, extractor_mg.molgraph) self.assertEqual(extractor_str.species, extractor_mol.species) self.assertEqual(extractor_str.species, extractor_mg.species) # Test optimization file_no_h = os.path.join(test_dir, "func_group_test_no_h.mol") extractor_no_h = FunctionalGroupExtractor(file_no_h, optimize=True) self.assertEqual(len(extractor_no_h.molecule), len(extractor_mol.molecule)) self.assertEqual(extractor_no_h.species, extractor_mol.species) def test_get_heteroatoms(self): heteroatoms = self.extractor.get_heteroatoms() hetero_species = [self.extractor.species[x] for x in heteroatoms] self.assertEqual(len(heteroatoms), 3) self.assertEqual(sorted(hetero_species), ["N", "O", "O"]) # Test with limitation hetero_no_o = self.extractor.get_heteroatoms(elements=["N"]) self.assertEqual(len(hetero_no_o), 1) def test_get_special_carbon(self): special_cs = self.extractor.get_special_carbon() self.assertEqual(len(special_cs), 4) # Test with limitation special_cs_no_o = self.extractor.get_special_carbon(elements=["N"]) self.assertEqual(len(special_cs_no_o), 2) def test_link_marked_atoms(self): heteroatoms = self.extractor.get_heteroatoms() special_cs = self.extractor.get_special_carbon() link = self.extractor.link_marked_atoms(heteroatoms.union(special_cs)) self.assertEqual(len(link), 1) self.assertEqual(len(link[0]), 9) # Exclude Oxygen-related functional groups heteroatoms_no_o = self.extractor.get_heteroatoms(elements=["N"]) special_cs_no_o = self.extractor.get_special_carbon(elements=["N"]) all_marked = heteroatoms_no_o.union(special_cs_no_o) link_no_o = self.extractor.link_marked_atoms(all_marked) self.assertEqual(len(link_no_o), 2) def test_get_basic_functional_groups(self): basics = self.extractor.get_basic_functional_groups() # Molecule has one methyl group which will be caught. self.assertEqual(len(basics), 1) self.assertEqual(len(basics[0]), 4) basics_no_methyl = self.extractor.get_basic_functional_groups(func_groups=["phenyl"]) self.assertEqual(len(basics_no_methyl), 0) def test_get_all_functional_groups(self): heteroatoms = self.extractor.get_heteroatoms() special_cs = self.extractor.get_special_carbon() link = self.extractor.link_marked_atoms(heteroatoms.union(special_cs)) basics = self.extractor.get_basic_functional_groups() all_func = self.extractor.get_all_functional_groups() self.assertEqual(len(all_func), (len(link) + len(basics))) self.assertEqual(sorted(all_func), sorted(link + basics)) def test_categorize_functional_groups(self): all_func = self.extractor.get_all_functional_groups() categorized = self.extractor.categorize_functional_groups(all_func) self.assertTrue("O=C1C=CC(=O)[N]1" in categorized.keys()) self.assertTrue("[CH3]" in categorized.keys()) total_count = sum([c["count"] for c in categorized.values()]) self.assertEqual(total_count, 2)
def get_molecule_data(self, mol_id): """ Compile all useful molecular data for analysis, including molecule size (number of atoms), molecular weight, enthalpy, entropy, and functional groups. NOTE: This function automatically converts energy, enthalpy, and entropy into SI units (J/mol and J/mol*K) :param mol_id: Unique ID associated with the molecule. :return: dict of relevant molecule data. """ mol_data = {"mol_id": mol_id} if self.db is None: raise RuntimeError("Cannot query database; connection is invalid." " Try to connect again.") collection = self.db.db["molecules"] mol_entry = collection.find_one({"mol_id": mol_id}) for calc in mol_entry["calcs_reversed"]: if calc["task"]["name"] in ["freq", "frequency"]: mol_data["enthalpy"] = calc["enthalpy"] * 4.184 * 1000 mol_data["entropy"] = calc["entropy"] * 4.184 if calc["task"]["name"] == "sp": mol_data["energy"] = calc[ "final_energy_sp"] * 627.509 * 4.184 * 1000 if calc["task"]["name"] in ["opt", "optimization"]: mol_dict = calc["molecule_from_optimized_geometry"] mol_data["molecule"] = Molecule.from_dict(mol_dict) adaptor = BabelMolAdaptor(mol_data["molecule"]) pbmol = adaptor.pybel_mol mol_data["number_atoms"] = len(mol_data["molecule"]) mol_data["molecular_weight"] = pbmol.molwt mol_data["tpsa"] = pbmol.calcdesc()["TPSA"] extractor = FunctionalGroupExtractor(mol_data["molecule"]) molgraph = extractor.molgraph func_grps = extractor.get_all_functional_groups() mol_data["functional_groups"] = extractor.categorize_functional_groups( func_grps) weights = nx.get_edge_attributes(molgraph.graph, "weight") bonds_checked = set() double_bonds = 0 triple_bonds = 0 for bond, weight in weights.items(): # Remove index from multidigraph bond = (bond[0], bond[1]) if int(weight) == 2 and bond not in bonds_checked: double_bonds += 1 elif int(weight) == 3 and bond not in bonds_checked: triple_bonds += 1 bonds_checked.add(bond) mol_data["double_bonds"] = double_bonds mol_data["triple_bonds"] = triple_bonds species = [str(s.specie) for s in mol_data["molecule"].sites] mol_data["species"] = dict(Counter(species)) return mol_data