Exemplo n.º 1
0
class FunctionalGroupExtractorTest(unittest.TestCase):

    def setUp(self):
        warnings.simplefilter("ignore")

        self.file = os.path.join(test_dir, "func_group_test.mol")
        self.mol = Molecule.from_file(self.file)
        self.strat = OpenBabelNN()
        self.mg = MoleculeGraph.with_local_env_strategy(self.mol, self.strat,
                                                        reorder=False,
                                                        extend_structure=False)
        self.extractor = FunctionalGroupExtractor(self.mg)

    def tearDown(self):
        warnings.simplefilter("default")
        del self.extractor
        del self.mg
        del self.strat
        del self.mol
        del self.file

    def test_init(self):
        # Ensure that instantiation is equivalent for all valid input types
        extractor_str = FunctionalGroupExtractor(self.file)
        extractor_mol = FunctionalGroupExtractor(self.mol)
        extractor_mg = self.extractor

        self.assertEqual(extractor_str.molgraph, extractor_mol.molgraph)
        self.assertEqual(extractor_str.molgraph, extractor_mg.molgraph)
        self.assertEqual(extractor_str.species, extractor_mol.species)
        self.assertEqual(extractor_str.species, extractor_mg.species)

        # Test optimization
        file_no_h = os.path.join(test_dir, "func_group_test_no_h.mol")
        extractor_no_h = FunctionalGroupExtractor(file_no_h, optimize=True)

        self.assertEqual(len(extractor_no_h.molecule), len(extractor_mol.molecule))
        self.assertEqual(extractor_no_h.species, extractor_mol.species)

    def test_get_heteroatoms(self):
        heteroatoms = self.extractor.get_heteroatoms()
        hetero_species = [self.extractor.species[x] for x in heteroatoms]

        self.assertEqual(len(heteroatoms), 3)
        self.assertEqual(sorted(hetero_species), ["N", "O", "O"])

        # Test with limitation
        hetero_no_o = self.extractor.get_heteroatoms(elements=["N"])
        self.assertEqual(len(hetero_no_o), 1)

    def test_get_special_carbon(self):
        special_cs = self.extractor.get_special_carbon()

        self.assertEqual(len(special_cs), 4)

        # Test with limitation
        special_cs_no_o = self.extractor.get_special_carbon(elements=["N"])
        self.assertEqual(len(special_cs_no_o), 2)

    def test_link_marked_atoms(self):
        heteroatoms = self.extractor.get_heteroatoms()
        special_cs = self.extractor.get_special_carbon()

        link = self.extractor.link_marked_atoms(heteroatoms.union(special_cs))

        self.assertEqual(len(link), 1)
        self.assertEqual(len(link[0]), 9)

        # Exclude Oxygen-related functional groups
        heteroatoms_no_o = self.extractor.get_heteroatoms(elements=["N"])
        special_cs_no_o = self.extractor.get_special_carbon(elements=["N"])
        all_marked = heteroatoms_no_o.union(special_cs_no_o)

        link_no_o = self.extractor.link_marked_atoms(all_marked)

        self.assertEqual(len(link_no_o), 2)

    def test_get_basic_functional_groups(self):
        basics = self.extractor.get_basic_functional_groups()

        # Molecule has one methyl group which will be caught.
        self.assertEqual(len(basics), 1)
        self.assertEqual(len(basics[0]), 4)

        basics_no_methyl = self.extractor.get_basic_functional_groups(func_groups=["phenyl"])
        self.assertEqual(len(basics_no_methyl), 0)

    def test_get_all_functional_groups(self):
        heteroatoms = self.extractor.get_heteroatoms()
        special_cs = self.extractor.get_special_carbon()

        link = self.extractor.link_marked_atoms(heteroatoms.union(special_cs))
        basics = self.extractor.get_basic_functional_groups()

        all_func = self.extractor.get_all_functional_groups()

        self.assertEqual(len(all_func), (len(link) + len(basics)))
        self.assertEqual(sorted(all_func), sorted(link + basics))

    def test_categorize_functional_groups(self):
        all_func = self.extractor.get_all_functional_groups()
        categorized = self.extractor.categorize_functional_groups(all_func)

        self.assertTrue("O=C1C=CC(=O)[N]1" in categorized.keys())
        self.assertTrue("[CH3]" in categorized.keys())

        total_count = sum([c["count"] for c in categorized.values()])
        self.assertEqual(total_count, 2)
class FunctionalGroupExtractorTest(unittest.TestCase):
    def setUp(self):
        warnings.simplefilter("ignore")

        self.file = os.path.join(test_dir, "func_group_test.mol")
        self.mol = Molecule.from_file(self.file)
        self.strat = OpenBabelNN()
        self.mg = MoleculeGraph.with_local_env_strategy(self.mol,
                                                        self.strat,
                                                        reorder=False,
                                                        extend_structure=False)
        self.extractor = FunctionalGroupExtractor(self.mg)

    def tearDown(self):
        warnings.simplefilter("default")
        del self.extractor
        del self.mg
        del self.strat
        del self.mol
        del self.file

    def test_init(self):
        # Ensure that instantiation is equivalent for all valid input types
        extractor_str = FunctionalGroupExtractor(self.file)
        extractor_mol = FunctionalGroupExtractor(self.mol)
        extractor_mg = self.extractor

        self.assertEqual(extractor_str.molgraph, extractor_mol.molgraph)
        self.assertEqual(extractor_str.molgraph, extractor_mg.molgraph)
        self.assertEqual(extractor_str.species, extractor_mol.species)
        self.assertEqual(extractor_str.species, extractor_mg.species)

        # Test optimization
        file_no_h = os.path.join(test_dir, "func_group_test_no_h.mol")
        extractor_no_h = FunctionalGroupExtractor(file_no_h, optimize=True)

        self.assertEqual(len(extractor_no_h.molecule),
                         len(extractor_mol.molecule))
        self.assertEqual(extractor_no_h.species, extractor_mol.species)

    def test_get_heteroatoms(self):
        heteroatoms = self.extractor.get_heteroatoms()
        hetero_species = [self.extractor.species[x] for x in heteroatoms]

        self.assertEqual(len(heteroatoms), 3)
        self.assertEqual(sorted(hetero_species), ["N", "O", "O"])

        # Test with limitation
        hetero_no_o = self.extractor.get_heteroatoms(elements=["N"])
        self.assertEqual(len(hetero_no_o), 1)

    def test_get_special_carbon(self):
        special_cs = self.extractor.get_special_carbon()

        self.assertEqual(len(special_cs), 4)

        # Test with limitation
        special_cs_no_o = self.extractor.get_special_carbon(elements=["N"])
        self.assertEqual(len(special_cs_no_o), 2)

    def test_link_marked_atoms(self):
        heteroatoms = self.extractor.get_heteroatoms()
        special_cs = self.extractor.get_special_carbon()

        link = self.extractor.link_marked_atoms(heteroatoms.union(special_cs))

        self.assertEqual(len(link), 1)
        self.assertEqual(len(link[0]), 9)

        # Exclude Oxygen-related functional groups
        heteroatoms_no_o = self.extractor.get_heteroatoms(elements=["N"])
        special_cs_no_o = self.extractor.get_special_carbon(elements=["N"])
        all_marked = heteroatoms_no_o.union(special_cs_no_o)

        link_no_o = self.extractor.link_marked_atoms(all_marked)

        self.assertEqual(len(link_no_o), 2)

    def test_get_basic_functional_groups(self):
        basics = self.extractor.get_basic_functional_groups()

        # Molecule has one methyl group which will be caught.
        self.assertEqual(len(basics), 1)
        self.assertEqual(len(basics[0]), 4)

        basics_no_methyl = self.extractor.get_basic_functional_groups(
            func_groups=["phenyl"])
        self.assertEqual(len(basics_no_methyl), 0)

    def test_get_all_functional_groups(self):
        heteroatoms = self.extractor.get_heteroatoms()
        special_cs = self.extractor.get_special_carbon()

        link = self.extractor.link_marked_atoms(heteroatoms.union(special_cs))
        basics = self.extractor.get_basic_functional_groups()

        all_func = self.extractor.get_all_functional_groups()

        self.assertEqual(len(all_func), (len(link) + len(basics)))
        self.assertEqual(sorted(all_func), sorted(link + basics))

    def test_categorize_functional_groups(self):
        all_func = self.extractor.get_all_functional_groups()
        categorized = self.extractor.categorize_functional_groups(all_func)

        self.assertTrue("O=C1C=CC(=O)[N]1" in categorized.keys())
        self.assertTrue("[CH3]" in categorized.keys())

        total_count = sum([c["count"] for c in categorized.values()])
        self.assertEqual(total_count, 2)
Exemplo n.º 3
0
    def get_molecule_data(self, mol_id):
        """
        Compile all useful molecular data for analysis, including molecule size
        (number of atoms), molecular weight, enthalpy, entropy, and functional
        groups.

        NOTE: This function automatically converts energy, enthalpy, and entropy
        into SI units (J/mol and J/mol*K)

        :param mol_id: Unique ID associated with the molecule.
        :return: dict of relevant molecule data.
        """

        mol_data = {"mol_id": mol_id}

        if self.db is None:
            raise RuntimeError("Cannot query database; connection is invalid."
                               " Try to connect again.")

        collection = self.db.db["molecules"]

        mol_entry = collection.find_one({"mol_id": mol_id})

        for calc in mol_entry["calcs_reversed"]:
            if calc["task"]["name"] in ["freq", "frequency"]:
                mol_data["enthalpy"] = calc["enthalpy"] * 4.184 * 1000
                mol_data["entropy"] = calc["entropy"] * 4.184
            if calc["task"]["name"] == "sp":
                mol_data["energy"] = calc[
                    "final_energy_sp"] * 627.509 * 4.184 * 1000
            if calc["task"]["name"] in ["opt", "optimization"]:
                mol_dict = calc["molecule_from_optimized_geometry"]
                mol_data["molecule"] = Molecule.from_dict(mol_dict)

        adaptor = BabelMolAdaptor(mol_data["molecule"])
        pbmol = adaptor.pybel_mol

        mol_data["number_atoms"] = len(mol_data["molecule"])
        mol_data["molecular_weight"] = pbmol.molwt
        mol_data["tpsa"] = pbmol.calcdesc()["TPSA"]

        extractor = FunctionalGroupExtractor(mol_data["molecule"])
        molgraph = extractor.molgraph
        func_grps = extractor.get_all_functional_groups()

        mol_data["functional_groups"] = extractor.categorize_functional_groups(
            func_grps)

        weights = nx.get_edge_attributes(molgraph.graph, "weight")
        bonds_checked = set()
        double_bonds = 0
        triple_bonds = 0
        for bond, weight in weights.items():
            # Remove index from multidigraph
            bond = (bond[0], bond[1])
            if int(weight) == 2 and bond not in bonds_checked:
                double_bonds += 1
            elif int(weight) == 3 and bond not in bonds_checked:
                triple_bonds += 1
            bonds_checked.add(bond)

        mol_data["double_bonds"] = double_bonds
        mol_data["triple_bonds"] = triple_bonds

        species = [str(s.specie) for s in mol_data["molecule"].sites]
        mol_data["species"] = dict(Counter(species))

        return mol_data