Exemplo n.º 1
0
    def test_set_molecule_from_smiles(self):
        """
        Test to create Molecule object by reading SMILES string.

        """
        test_smiles = "CC"
        test_molecule = Molecule()
        test_molecule._set_molecule_from_smiles(test_smiles)
        self.assertEqual(
            test_molecule.mol_text,
            test_smiles,
            "Expected mol_text attribute to be set "
            "to smiles string",
        )
        self.assertIsNotNone(
            test_molecule.mol_graph,
            "Expected mol_graph attribute to be set "
            "from the smiles",
        )
        self.assertIsInstance(
            test_molecule.mol_graph,
            rdkit.Chem.rdchem.Mol,
            "Expected initialized mol_graph to "
            "be rdkit.Chem.rdchem.Mol object",
        )
Exemplo n.º 2
0
 def test_missing_smiles(self):
     """
     Missing smiles strings should raise a LoadingError.
     """
     with self.assertRaises(LoadingError):
         test_molecule = Molecule()
         test_molecule._set_molecule_from_smiles([])
Exemplo n.º 3
0
 def test_invalid_pdb(self):
     """Invalid PDB files should raise a LoadingError.
     """
     Path('blank.pdb').touch()
     with self.assertRaises(LoadingError):
         test_molecule = Molecule()
         test_molecule._set_molecule_from_pdb("blank.pdb")
     remove('blank.pdb')
Exemplo n.º 4
0
 def test_match_fprint_error(self):
     """Trying to match fingerprint from molecule w/o graph should throw an error.
     """
     # initialize a molecule normally
     ref_mol = Molecule(mol_smiles="C")
     ref_mol.set_descriptor(fingerprint_type="morgan_fingerprint")
     # delete the molecular graph
     ref_mol.mol_graph = np.array([])
     mol = Molecule()
     with self.assertRaises(ValueError):
         mol.match_fingerprint_from(ref_mol)
Exemplo n.º 5
0
    def test_molecule_created_w_attributes(self):
        """
        Test to create Molecule object with descriptor value (list) and a
        response scalar.

        """
        test_molecule = Molecule(mol_text="test_molecule",
                                 mol_property_val=42,
                                 mol_descriptor_val=[1, 2, 3])
        self.assertEqual(
            test_molecule.mol_text,
            "test_molecule",
            "Expected mol_text attribute to be set.",
        )
        self.assertEqual(test_molecule.mol_property_val, 42,
                         "Expected mol_property_val to be set.")
        self.assertIsInstance(
            test_molecule.descriptor.to_numpy(),
            np.ndarray,
            "Expected descriptor.to_numpy()to be np.ndarray",
        )
        self.assertTrue(
            np.all(test_molecule.descriptor.to_numpy() == np.array([1, 2, 3])),
            "Expected descriptor.to_numpy() to be array[1, 2, 3]",
        )
        self.assertEqual(
            test_molecule.descriptor.label_,
            "arbitrary",
            "Expected descriptor.label to be arbitrary since "
            "it was initialized by list/array",
        )
Exemplo n.º 6
0
    def _extract_configs(self):
        target_molecule_smiles = self.configs.get("target_molecule_smiles")
        target_molecule_src = self.configs.get("target_molecule_src")
        if target_molecule_smiles:
            self.target_molecule = Molecule(mol_smiles=target_molecule_smiles)
        elif target_molecule_src:
            self.target_molecule = Molecule(mol_src=target_molecule_src)
        else:
            raise IOError("Target molecule source is not specified")

        self.log_fpath = self.configs.get("log_file_path", None)
        if self.log_fpath is not None:
            log_dir = dirname(self.log_fpath)
            makedirs(log_dir, exist_ok=True)

        self.plot_settings = self.configs.get("similarity_plot_settings", {})
        self.n_hits = self.configs.get("n_hits", 1)
        self.draw_molecules = self.configs.get("draw_molecules", False)
Exemplo n.º 7
0
    def test_mol_mol_similarity_w_morgan_tanimoto(self):
        """
        Test that the tanimoto similarity of the morgan fingerprints of
        two Molecules are in (0, 1).

        """
        mol1_smiles = "CCCCCCCCC"
        mol2_smiles = "CCCCCCCCCCC"
        fingerprint_type = "morgan_fingerprint"
        similarity_metric = "tanimoto"
        molecules = []
        for smiles in [mol1_smiles, mol2_smiles]:
            molecule = Molecule(mol_smiles=smiles)
            molecule.set_descriptor(fingerprint_type=fingerprint_type)
            molecules.append(molecule)
        similarity_measure = SimilarityMeasure(metric=similarity_metric)
        tanimoto_similarity = molecules[0].get_similarity_to(
            molecules[1], similarity_measure=similarity_measure)
        self.assertGreaterEqual(tanimoto_similarity, 0.0,
                                "Expected tanimoto similarity to be >= 0.")
        self.assertLessEqual(tanimoto_similarity, 1.0,
                             "Expected tanimoto similarity to be <= 1.")
Exemplo n.º 8
0
    def is_present(self, target_molecule):
        """
        Searches the name of a target molecule in the molecule set to
        determine if the target molecule is present in the molecule set.

        Args:
            target_molecule (AIMSim.chemical_datastructures.Molecule):
                Target molecule to search.

        Returns:
            (bool): If the molecule is present in the molecule set or not.

        """
        for set_molecule in self.molecule_database:
            if Molecule().is_same(set_molecule, target_molecule):
                return True
        return False
Exemplo n.º 9
0
    def test_molecule_draw(self):
        """
        Test to draw molecule stored in Molecule object.

        """
        test_smiles = "CC"
        test_molecule = Molecule()
        test_molecule._set_molecule_from_smiles(test_smiles)
        test_image_fpath = test_smiles + ".png"
        test_molecule.draw(fpath=test_image_fpath)
        self.assertTrue(os.path.isfile(test_image_fpath))
        try:
            print(f"Deleting {test_image_fpath}")
            remove(test_image_fpath)
        except FileNotFoundError:
            print(f"Could not find {test_image_fpath}")
Exemplo n.º 10
0
    def test_molecule_created_with_no_attributes(self):
        """
        Test for creation of empty Molecule object with no attributes.

        """
        test_molecule = Molecule()
        self.assertIsNone(
            test_molecule.mol_graph,
            "Expected attribute mol_graph to be None for uninitialized Molecule",
        )
        self.assertIsNone(
            test_molecule.mol_text,
            "Expected attribute mol_text to be None for uninitialized Molecule",
        )
        self.assertIsNone(
            test_molecule.mol_property_val,
            "Expected attribute mol_property_val to be None "
            "for uninitialized Molecule",
        )
        self.assertFalse(
            test_molecule.descriptor.check_init(),
            "Expected molecule.descriptor to be unitialized  "
            "for uninitialized Molecule",
        )
Exemplo n.º 11
0
 def test_mol_smiles_loadingerror(self):
     """Error in mol_smiles should raise LoadingError
     """
     with self.assertRaises(LoadingError):
         Molecule(mol_smiles="XYZ")
Exemplo n.º 12
0
    def _get_molecule_database(self, molecule_database_src,
                               molecule_database_src_type):
        """Load molecular database and return it.
        Optionally return features if found in excel / csv file.

        Args:
            molecule_database_src (str):
                Source of molecular information. Can be a folder or a filepath.
                In case a folder is specified, all .pdb files in the folder
                are sequentially read.
                If a file path, it is assumed that the file is a .txt file with
                layout: SMILES string (column1) '\b' property (column2, optional).
            molecule_database_src_type (str):
                Type of source. Can be ['folder', 'text', 'excel', 'csv']

        Returns:
            (list(Molecule), np.ndarray or None)
                Returns a tuple. First element of tuple is the molecule_database.
                Second element is array of features of shape
                (len(molecule_database), n_features) or None if None found.

        """
        if not self.is_verbose:
            RDLogger.DisableLog('rdApp.*')

        molecule_database = []
        features = None
        if molecule_database_src_type.lower() in ["folder", "directory"]:
            if self.is_verbose:
                print(f"Searching for *.pdb files in {molecule_database_src}")
            for molfile in glob(os.path.join(molecule_database_src, "*.pdb")):
                if self.is_verbose:
                    print(f"Loading {molfile}")
                try:
                    molecule_database.append(Molecule(mol_src=molfile))
                except LoadingError as e:
                    if self.is_verbose:
                        print(f"{molfile} could not be imported. Skipping")

        elif molecule_database_src_type.lower() == "text":
            if self.is_verbose:
                print(f"Reading SMILES strings from {molecule_database_src}")
            with open(molecule_database_src, "r") as fp:
                smiles_data = fp.readlines()
            for count, line in enumerate(smiles_data):
                # Assumes that the first column contains the smiles string
                line_fields = line.split()
                smile = line_fields[0]
                mol_property_val = None
                if len(line_fields) > 1:
                    mol_property_val = float(line_fields[1])
                if self.is_verbose:
                    print(f"Processing {smile} "
                          f"({count + 1}/"
                          f"{len(smiles_data)})")
                mol_text = smile
                try:
                    molecule_database.append(
                        Molecule(
                            mol_smiles=smile,
                            mol_text=mol_text,
                            mol_property_val=mol_property_val,
                        ))
                except LoadingError as e:
                    if self.is_verbose:
                        print(f"{smile} could not be imported. Skipping")

        elif molecule_database_src_type.lower() in ["excel", "csv"]:
            if self.is_verbose:
                print(f"Reading molecules from {molecule_database_src}")
            database_df = (pd.read_excel(molecule_database_src,
                                         engine="openpyxl")
                           if molecule_database_src_type.lower() == "excel"
                           else pd.read_csv(molecule_database_src))
            # expects feature columns to be prefixed with feature_
            # e.g. feature_smiles
            feature_cols = [
                column for column in database_df.columns
                if column.split("_")[0] == "feature"
            ]
            database_feature_df = database_df[feature_cols]
            mol_names, mol_smiles, responses = None, None, None
            if "feature_name" in feature_cols:
                mol_names = database_feature_df["feature_name"].values.flatten(
                )
                database_feature_df = database_feature_df.drop(
                    ["feature_name"], axis=1)
            if "feature_smiles" in feature_cols:
                mol_smiles = database_df["feature_smiles"].values.flatten()
                database_feature_df = database_feature_df.drop(
                    ["feature_smiles"], axis=1)

            response_col = [
                column for column in database_df.columns
                if column.split("_")[0] == "response"
            ]
            if len(response_col) > 0:
                # currently handles one response
                responses = database_df[response_col].values.flatten()
            for mol_id, smile in enumerate(mol_smiles):
                if self.is_verbose:
                    print(f"Processing {smile} "
                          f"({mol_id + 1}/"
                          f"{database_df['feature_smiles'].values.size})")
                mol_text = mol_names[mol_id] if mol_names is not None else smile

                mol_property_val = responses[
                    mol_id] if responses is not None else None

                try:
                    molecule_database.append(
                        Molecule(
                            mol_smiles=smile,
                            mol_text=mol_text,
                            mol_property_val=mol_property_val,
                        ))
                except LoadingError as e:
                    if self.is_verbose:
                        print(f"{smile} could not be imported. Skipping")

            if len(database_feature_df.columns) > 0:
                features = database_feature_df.values
        else:
            raise FileNotFoundError(
                f"{molecule_database_src} could not be found. "
                f"Please enter valid folder name or path of a "
                f"text/excel/csv")
        if len(molecule_database) == 0:
            raise UserWarning("No molecular files found in the location!")
        return molecule_database, features
Exemplo n.º 13
0
 def test_missing_pdb(self):
     """Missing PDB files should raise a LoadingError.
     """
     with self.assertRaises(LoadingError):
         test_molecule = Molecule()
         test_molecule._set_molecule_from_pdb("missing.pdb")
Exemplo n.º 14
0
    def test_set_molecule_from_file(self):
        """
        Test to create Molecule object by reading the contents of a file.

        Case #1: text file
        Case #2: PDB file

        """
        test_smiles = "CC"
        # Case 1: text file
        test_text_molecule = Molecule()
        text_fpath = "test_mol_src.txt"
        print(f"Creating file {text_fpath}...")
        with open(text_fpath, "w") as fp:
            fp.write(test_smiles + " garbage vals")
        test_text_molecule._set_molecule_from_file(text_fpath)
        self.assertEqual(
            test_text_molecule.mol_text,
            test_smiles,
            "Expected mol_text attribute to be set "
            "to smiles string when loading from txt file",
        )
        self.assertIsNotNone(
            test_text_molecule.mol_graph,
            "Expected mol_graph attribute to be set "
            "from the smiles when loading from txt file",
        )
        self.assertIsInstance(
            test_text_molecule.mol_graph,
            rdkit.Chem.rdchem.Mol,
            "Expected initialized mol_graph to "
            "be rdkit.Chem.rdchem.Mol object "
            "when loading from txt file",
        )
        print(f"Test complete. Deleting file {text_fpath}...")
        remove(text_fpath)

        # Case 2: pdb file
        test_pdb_molecule = Molecule()
        test_pdb_filename = "test_mol_src.pdb"
        print(f"Creating file {test_pdb_filename}...")
        test_mol = MolFromSmiles(test_smiles)
        MolToPDBFile(test_mol, test_pdb_filename)
        test_pdb_molecule._set_molecule_from_file(test_pdb_filename)
        self.assertEqual(
            test_pdb_molecule.mol_text,
            os.path.basename(test_pdb_filename).split('.')[0],
            "Expected mol_text attribute to be set "
            "to name of file when loading from pdb file",
        )
        self.assertIsNotNone(
            test_pdb_molecule.mol_graph,
            "Expected mol_graph attribute to be set "
            "from the smiles when loading from pdb file",
        )
        self.assertIsInstance(
            test_pdb_molecule.mol_graph,
            rdkit.Chem.rdchem.Mol,
            "Expected initialized mol_graph to "
            "be rdkit.Chem.rdchem.Mol object "
            "when loading from pdb file",
        )
        print(f"Test complete. Deleting file {test_pdb_filename}...")
        remove(test_pdb_filename)
Exemplo n.º 15
0
 def test_get_property_value(self):
     """Retrieve the property value from the molecule
     """
     correct_val = 10
     mol = Molecule(mol_text="C", mol_property_val=correct_val)
     self.assertEqual(mol.get_mol_property_val(), correct_val)
Exemplo n.º 16
0
 def test_get_name(self):
     """Retrieve the name from the molecule.
     """
     mol = Molecule(mol_text="C")
     self.assertEqual(mol.get_name(), "C")
Exemplo n.º 17
0
 def test_is_same(self):
     """Two identical molecules should be identifed as such.
     """
     mol_1 = Molecule(mol_text="C")
     mol_2 = Molecule(mol_text="C")
     self.assertTrue(Molecule.is_same(mol_1, mol_2))
Exemplo n.º 18
0
 def test_invalid_smiles(self):
     """Invalid SMILES strings should raise a LoadingError.
     """
     with self.assertRaises(LoadingError):
         test_molecule = Molecule()
         test_molecule._set_molecule_from_smiles("XYZ")
Exemplo n.º 19
0
 def test_mol_src_txt_loadingerror(self):
     """Error in mol_src should raise LoadingError
     """
     with self.assertRaises(LoadingError):
         Molecule(mol_src='non-existent file.txt')
Exemplo n.º 20
0
    def test_molecule_graph_similar_to_itself_morgan_dice(self):
        """
        Test that the morgan fingerprint of a Molecule object is similar
        to itself using dice similarity.

        """
        test_smiles = "CCO"
        fingerprint_type = "morgan_fingerprint"
        similarity_metric = "dice"
        test_molecule = Molecule()
        test_molecule._set_molecule_from_smiles(test_smiles)
        test_molecule_duplicate = Molecule()
        test_molecule_duplicate._set_molecule_from_smiles(test_smiles)
        test_molecule.set_descriptor(fingerprint_type=fingerprint_type)
        test_molecule_duplicate.set_descriptor(
            fingerprint_type=fingerprint_type)
        similarity_measure = SimilarityMeasure(metric=similarity_metric)
        dice_similarity = test_molecule.get_similarity_to(
            test_molecule_duplicate, similarity_measure=similarity_measure)
        self.assertEqual(
            dice_similarity,
            1.0,
            "Expected dice similarity to be 1 when comparing "
            "molecule graph to itself",
        )