Exemplo n.º 1
0
    def _searchSubStructure (self, numsel, metric):

        if metric is None:
            metric = 'Substructural'
        elif metric != 'Substructural':
            LOG.warning (f'Metric {metric} is not compatible with the descriptors present in this space. Using Substructural')
            metric = 'Substructural'

        results = []

        t1 = time.time()

        # for each compound in the search set 
        for i, ivector in enumerate(self.X):
            bitestring="".join(ivector.astype(str))
            ifp = DataStructs.cDataStructs.CreateFromBitString(bitestring)

            nselected = 0
            selected_i = []
            selected_d = []
            
            # for each compound in the space
            for j, jfp in enumerate(self.Xref):
                if DataStructs.AllProbeBitsMatch(ifp, jfp):
                # if True:
                    mi = Chem.MolFromSmiles(self.objinfo['SMILES'][i])
                    # mi = Chem.MolFromSmarts('C[!C](C)CC1*C=CCC1')
                    mj = Chem.MolFromSmiles(self.objinforef['SMILES'][j])

                    if mj.HasSubstructMatch(mi):
                        selected_i.append(j)
                        selected_d.append(1.00)
                        nselected+=1

                if nselected >= numsel:
                    break

            # results for molecule i are stored in a dictionary
            results_info = {}
            results_info['distances'] = []   # distances are allways stored
            for oi in self.objinforef:
                results_info[oi] = []        # all the objects information (name, smiles, ID, activity, etc.)

            for sd,si in zip(selected_d, selected_i):
                results_info['distances'].append(sd)
                for oi in self.objinforef:
                    results_info[oi].append(self.objinforef[oi][si])

            results.append(results_info)

        LOG.info (f'search completed in time: {time.time()-t1:.4f} secs')

        return True, results
Exemplo n.º 2
0
    def get_on_bits(self, mol):
        if isinstance(mol, str):
            mol = Chem.MolFromSmiles(mol)
        mol_fp = Chem.RDKFingerprint(mol)

        on_bits = []
        for i, s_fp_i in enumerate(self.scaffold_fps):
            if DataStructs.AllProbeBitsMatch(s_fp_i, mol_fp):
                if mol.HasSubstructMatch(self.scaffolds[i]):
                    on_bits.append(i)

        return on_bits
Exemplo n.º 3
0
    def _searchSubStructure (self, numsel, metric):

        LOG.info ('searching for similar compounds using Substructure similarity')

        results = []

        t1 = time.time()

        if self.isSMARTS:

            nselected = 0
            selected_i = []
            selected_d = []

             # for each compound in the space
            for j, jfp in enumerate(self.Xref):
                # mi = Chem.MolFromSmarts('C[!C](C)CC1*C=CCC1')
                mi = Chem.MolFromSmarts(self.conveyor.getVal('SMARTS'))
                mj = Chem.MolFromSmiles(self.objinforef['SMILES'][j])

                if mj.HasSubstructMatch(mi):
                    selected_i.append(j)
                    selected_d.append(1.00)
                    nselected+=1

                if nselected >= numsel:
                    break

            # results for molecule i are stored in a dictionary
            results_info = {}
            results_info['distances'] = []   # distances are allways stored
            for oi in self.objinforef:
                results_info[oi] = []        # all the objects information (name, smiles, ID, activity, etc.)

            for sd,si in zip(selected_d, selected_i):
                results_info['distances'].append(sd)
                for oi in self.objinforef:
                    results_info[oi].append(self.objinforef[oi][si])

            results.append(results_info)
            
            LOG.info (f'search completed in time: {time.time()-t1:.4f} secs')

            return True, results

        # for each compound in the search set 
        for i, ivector in enumerate(self.X):
            bitestring="".join(ivector.astype(str))
            ifp = DataStructs.cDataStructs.CreateFromBitString(bitestring)

            nselected = 0
            selected_i = []
            selected_d = []
            
            # for each compound in the space
            for j, jfp in enumerate(self.Xref):
                if DataStructs.AllProbeBitsMatch(ifp, jfp):
                # if True:
                    mi = Chem.MolFromSmiles(self.objinfo['SMILES'][i])
                    mj = Chem.MolFromSmiles(self.objinforef['SMILES'][j])

                    if mj.HasSubstructMatch(mi):
                        selected_i.append(j)
                        selected_d.append(1.00)
                        nselected+=1

                if nselected >= numsel:
                    break

            # results for molecule i are stored in a dictionary
            results_info = {}
            results_info['distances'] = []   # distances are allways stored
            for oi in self.objinforef:
                results_info[oi] = []        # all the objects information (name, smiles, ID, activity, etc.)

            for sd,si in zip(selected_d, selected_i):
                results_info['distances'].append(sd)
                for oi in self.objinforef:
                    results_info[oi].append(self.objinforef[oi][si])

            results.append(results_info)

        LOG.info (f'search completed in time: {time.time()-t1:.4f} secs')

        return True, results