示例#1
0
 def test_empty_fprints(self):
     """Empty fingerprints cannot be compared.
     """
     desc_1 = Descriptor([0])
     desc_2 = Descriptor([0])
     sim_mes = SimilarityMeasure("tanimoto")
     with self.assertRaises(ValueError):
         sim_mes(desc_1, desc_2)
示例#2
0
 def showAllDescriptorsCallback(self):
     """update the descriptors dropdown to show descriptors."""
     if "selected" in self.showAllDescriptorsButton.state():
         self.molecularDescriptorCombobox[
             "values"] = Descriptor.get_all_supported_descriptors()
     else:
         self.molecularDescriptorCombobox[
             "values"] = values = Descriptor.get_supported_fprints()
     return
示例#3
0
 def test_binary_only_metrics(self):
     """Metrics which only allow bit vectors should raise
     a value error on other inputs.
     """
     desc_1 = Descriptor([1, 2])
     desc_2 = Descriptor([3, 4])
     for metric in SimilarityMeasure.get_supported_binary_metrics():
         sim_mes = SimilarityMeasure(metric)
         with self.assertRaises(ValueError):
             sim_mes(desc_1, desc_2)
示例#4
0
    def __init__(
        self,
        mol_graph=None,
        mol_text=None,
        mol_property_val=None,
        mol_descriptor_val=None,
        mol_src=None,
        mol_smiles=None,
    ):
        """Constructor

        Args:
            mol_graph (RDKIT mol object): Graph-level information of molecule.
                Implemented as an RDKIT mol object. Default is None.
            mol_text (str): Text identifier of the molecule. Default is None.
                Identifiers can be:
                ------------------
                1. Name of the molecule.
                2. SMILES string representing the molecule.
            mol_property_val (float): Some property associated with the
                molecule. This is typically the response being studied.
                E.g. Boiling point, Selectivity etc. Default is None.
            mol_descriptor_val (numpy ndarray): Descriptor value for the
                molecule. Must be numpy array or list. Default is None.
            mol_src (str):
                Source file or SMILES string to load molecule. Acceptable files:
                -> .pdb file
                -> .txt file with SMILE string in first column, first row and
                        (optionally) property in second column, first row.
                Default is None.
                If provided mol_graph is attempted to be loaded from it.
            mol_smiles (str): SMILES string for molecule. If provided, mol_graph
                is loaded from it. If mol_text not set in keyword argument,
                this string is used to set it.
        """
        self.mol_graph = mol_graph
        self.mol_text = mol_text
        self.mol_property_val = mol_property_val
        self.descriptor = (Descriptor() if mol_descriptor_val is None else
                           Descriptor(value=np.array(mol_descriptor_val)))
        if mol_src is not None:
            try:
                self._set_molecule_from_file(mol_src)
            except LoadingError as e:
                raise e
        if mol_smiles is not None:
            try:
                self._set_molecule_from_smiles(mol_smiles)
            except LoadingError as e:
                raise e
示例#5
0
 def test_vectornorm_length_errors(self):
     """
     Vector norm-based similarities should only work with
     descriptors of the same length, otherwise it should raise
     a value error.
     """
     desc_1 = Descriptor([1, 2])
     desc_2 = Descriptor([3])
     sim_mes_1 = SimilarityMeasure("l0_similarity")
     sim_mes_2 = SimilarityMeasure("l1_similarity")
     sim_mes_3 = SimilarityMeasure("l2_similarity")
     with self.assertRaises(ValueError):
         sim_mes_1(desc_1, desc_2)
     with self.assertRaises(ValueError):
         sim_mes_2(desc_1, desc_2)
     with self.assertRaises(ValueError):
         sim_mes_3(desc_1, desc_2)
示例#6
0
    def __init__(
        self,
        molecule_database_src,
        molecule_database_src_type,
        is_verbose,
        similarity_measure,
        n_threads=1,
        fingerprint_type=None,
        fingerprint_params=None,
        sampling_ratio=1.0,
        sampling_random_state=42,
    ):
        """Constructor for the MoleculeSet class.
        Args:
            sampling_ratio (float): Fraction of the molecules to keep. Useful
                for selection subset of dataset for quick computations.
            sampling_random_state (int): Random state used for sampling.
                Default is 42.

        """
        self.is_verbose = is_verbose
        self.molecule_database = None
        self.descriptor = Descriptor()
        self.molecule_database, features = self._get_molecule_database(
            molecule_database_src, molecule_database_src_type)
        if features is not None:
            self._set_descriptor(arbitrary_descriptor_vals=features)
        if 0.0 < sampling_ratio < 1.0:
            if self.is_verbose:
                print(f"Using {int(sampling_ratio * 100)}% of the database...")
            self._subsample_database(sampling_ratio=sampling_ratio,
                                     random_state=sampling_random_state)
        if fingerprint_type is not None:
            # overrides if descriptor set in self._set_molecule_database
            self._set_descriptor(fingerprint_type=fingerprint_type,
                                 fingerprint_params=fingerprint_params)
        self.similarity_measure = SimilarityMeasure(similarity_measure)
        if n_threads == 'auto':

            def speedup_eqn(n_mols, n_procs):
                return 1.8505e-4 * n_mols + 2.235e-1 * n_procs + 7.082e-2

            n_cores = psutil.cpu_count(logical=False)
            n_mols = len(self.molecule_database)
            if speedup_eqn(n_mols, n_cores) > 1.0:
                self.n_threads = n_cores
            elif speedup_eqn(n_mols, n_cores // 2) > 1.0:
                self.n_threads = n_cores // 2
            else:
                self.n_threads = n_cores
        else:
            self.n_threads = n_threads
        self.similarity_matrix = None
        self._set_similarity_matrix()
示例#7
0
    def __init__(self, master=None):
        """Constructor for AIMSim.

        Args:
            master (tk, optional): tk window. Defaults to None.
        """
        # build ui
        self.window = tk.Tk() if master is None else tk.Toplevel(master)
        self.window.title("AIMSim")
        resource_path = pkg_resources.resource_filename(
            __name__,
            "AIMSim-logo.png",
        )
        self.window.iconphoto(False, tk.PhotoImage(file=resource_path))
        self.databaseFile = tk.StringVar(self.window)
        self.targetMolecule = tk.StringVar(self.window)
        self.similarityMeasure = tk.StringVar(self.window)
        self.molecularDescriptor = tk.StringVar(self.window)
        self.titleLabel = ttk.Label(self.window)
        self.titleLabel.configure(font="TkDefaultFont 14 bold",
                                  text="AI Molecular Similarity")
        self.titleLabel.place(anchor="center",
                              relx="0.5",
                              rely="0.05",
                              x="0",
                              y="0")
        self.mainframe = ttk.Frame(self.window)
        self.verboseCheckbutton = ttk.Checkbutton(self.mainframe)
        self.verboseCheckbutton.configure(compound="top",
                                          cursor="arrow",
                                          offvalue="False",
                                          onvalue="True")
        self.verboseCheckbutton.configure(state="normal", text="Verbose")
        self.verboseCheckbutton.place(anchor="center",
                                      relx="0.1",
                                      rely="0.95",
                                      x="0",
                                      y="0")
        self.databaseFileEntry = ttk.Entry(self.mainframe,
                                           textvariable=self.databaseFile)
        _text_ = """smiles_responses.txt"""
        self.databaseFileEntry.delete("0", "end")
        self.databaseFileEntry.insert("0", _text_)
        self.databaseFileEntry.place(anchor="center",
                                     relx="0.5",
                                     rely="0.03",
                                     x="0",
                                     y="0")
        self.databaseFileLabel = ttk.Label(self.mainframe)
        self.databaseFileLabel.configure(text="Database File:")
        self.databaseFileLabel.place(anchor="center",
                                     relx=".2",
                                     rely="0.03",
                                     x="0",
                                     y="0")
        self.targetMoleculeEntry = ttk.Entry(self.mainframe,
                                             textvariable=self.targetMolecule)
        _text_ = """CO"""
        self.targetMoleculeEntry.delete("0", "end")
        self.targetMoleculeEntry.insert("0", _text_)
        self.targetMoleculeEntry.place(anchor="center",
                                       relx="0.5",
                                       rely="0.27",
                                       x="0",
                                       y="0")
        self.targetMoleculeLabel = ttk.Label(self.mainframe)
        self.targetMoleculeLabel.configure(text="Target Molecule:")
        self.targetMoleculeLabel.place(anchor="center",
                                       relx="0.17",
                                       rely="0.27",
                                       x="0",
                                       y="0")
        self.similarityPlotsCheckbutton = ttk.Checkbutton(self.mainframe)
        self.similarityPlotsCheckbutton.configure(text="Similarity Plots")
        self.similarityPlotsCheckbutton.place(anchor="center",
                                              relx="0.5",
                                              rely="0.1",
                                              x="0",
                                              y="0")
        self.propertySimilarityCheckbutton = ttk.Checkbutton(self.mainframe)
        self.propertySimilarityCheckbutton.configure(
            text="Property Similarity Plot")
        self.propertySimilarityCheckbutton.place(anchor="center",
                                                 relx="0.5",
                                                 rely="0.15",
                                                 x="0",
                                                 y="0")
        self.similarityPlotCheckbutton = ttk.Checkbutton(self.mainframe)
        self.similarityPlotCheckbutton.configure(text="Similarity Plot")
        self.similarityPlotCheckbutton.place(anchor="center",
                                             relx="0.5",
                                             rely="0.35",
                                             x="0",
                                             y="0")
        self.similarityMeasureCombobox = ttk.Combobox(
            self.mainframe,
            textvariable=self.similarityMeasure,
            state="readonly")
        self.similarityMeasureCombobox.configure(
            takefocus=False, values=SimilarityMeasure.get_supported_metrics())
        self.similarityMeasureCombobox.current(0)
        self.similarityMeasureCombobox.place(anchor="center",
                                             relx="0.55",
                                             rely="0.45",
                                             x="0",
                                             y="0")
        self.similarityMeasureLabel = ttk.Label(self.mainframe)
        self.similarityMeasureLabel.configure(text="Similarity Measure:")
        self.similarityMeasureLabel.place(anchor="center",
                                          relx="0.2",
                                          rely="0.45",
                                          x="0",
                                          y="0")
        self.molecularDescriptorLabel = ttk.Label(self.mainframe)
        self.molecularDescriptorLabel.configure(text="Molecular Descriptor:")
        self.molecularDescriptorLabel.place(anchor="center",
                                            relx="0.18",
                                            rely="0.55",
                                            x="0",
                                            y="0")
        self.molecularDescriptorCombobox = ttk.Combobox(
            self.mainframe,
            textvariable=self.molecularDescriptor,
            state="readonly")
        self.molecularDescriptorCombobox.configure(
            cursor="arrow",
            justify="left",
            takefocus=False,
            # values=Descriptor.get_all_supported_descriptors(),
            values=Descriptor.get_supported_fprints(),
        )

        # define the callback for the descriptor
        def updateCompatibleMetricsListener(event):
            """Show only compatible metrics, given a descriptor."""
            self.similarityMeasureCombobox[
                "values"] = SimilarityMeasure.get_compatible_metrics().get(
                    self.molecularDescriptor.get(), "Error")
            self.similarityMeasureCombobox.current(0)
            return

        # bind this listener to the combobox
        self.molecularDescriptorCombobox.bind("<<ComboboxSelected>>",
                                              updateCompatibleMetricsListener)
        self.molecularDescriptorCombobox.place(anchor="center",
                                               relx="0.55",
                                               rely="0.55",
                                               x="0",
                                               y="0")
        self.molecularDescriptorCombobox.current(0)
        self.runButton = ttk.Button(self.mainframe)
        self.runButton.configure(text="Run")
        self.runButton.place(anchor="center",
                             relx="0.5",
                             rely="0.75",
                             x="0",
                             y="0")
        self.runButton.configure(command=self.runCallback)
        self.openConfigButton = ttk.Button(self.mainframe)
        self.openConfigButton.configure(text="Open Config")
        self.openConfigButton.place(anchor="center",
                                    relx="0.5",
                                    rely="0.85",
                                    x="0",
                                    y="0")
        self.openConfigButton.configure(command=self.openConfigCallback)
        self.showAllDescriptorsButton = ttk.Checkbutton(self.mainframe)
        self.showAllDescriptorsButton.configure(
            compound="top",
            cursor="arrow",
            offvalue="False",
            onvalue="True",
            command=self.showAllDescriptorsCallback,
        )
        self.showAllDescriptorsButton.configure(
            state="normal", text="Show experimental descriptors")
        self.showAllDescriptorsButton.place(anchor="center",
                                            relx="0.45",
                                            rely="0.65",
                                            x="0",
                                            y="0")
        self.multiprocessingCheckbutton = ttk.Checkbutton(self.mainframe)
        self.multiprocessingCheckbutton.configure(compound="top",
                                                  cursor="arrow",
                                                  offvalue="False",
                                                  onvalue="True")
        self.multiprocessingCheckbutton.configure(
            state="normal", text="Enable Multiple Workers")
        self.multiprocessingCheckbutton.place(anchor="center",
                                              relx="0.78",
                                              rely="0.95",
                                              x="0",
                                              y="0")
        self.identifyOutliersCheckbutton = ttk.Checkbutton(self.mainframe)
        self.identifyOutliersCheckbutton.configure(compound="top",
                                                   cursor="arrow",
                                                   offvalue="False",
                                                   onvalue="True")
        self.identifyOutliersCheckbutton.configure(state="normal",
                                                   text="Outlier Check")
        self.identifyOutliersCheckbutton.place(anchor="center",
                                               relx="0.4",
                                               rely="0.95",
                                               x="0",
                                               y="0")
        self.mainframe.configure(height="400", width="400")
        self.mainframe.place(anchor="nw",
                             relheight="0.9",
                             rely="0.1",
                             x="0",
                             y="0")
        self.window.configure(cursor="arrow",
                              height="400",
                              relief="flat",
                              takefocus=False)
        self.window.configure(width="400")

        # Main widget
        self.mainwindow = self.window