Пример #1
0
    def testSpiroAndBridgeheads(self):
        m = Chem.MolFromSmiles("C1CC2CCC1CC2")
        self.assertEqual(rdMD.CalcNumSpiroAtoms(m), 0)
        sa = []
        self.assertEqual(rdMD.CalcNumSpiroAtoms(m, atoms=sa), 0)
        self.assertEqual(len(sa), 0)

        self.assertEqual(rdMD.CalcNumBridgeheadAtoms(m), 2)
        sa = []
        self.assertEqual(rdMD.CalcNumBridgeheadAtoms(m, atoms=sa), 2)
        self.assertEqual(len(sa), 2)
        self.assertEqual(sorted(sa), [2, 5])

        m = Chem.MolFromSmiles("C1CCC2(C1)CC1CCC2CC1")
        self.assertEqual(rdMD.CalcNumSpiroAtoms(m), 1)
        sa = []
        self.assertEqual(rdMD.CalcNumSpiroAtoms(m, atoms=sa), 1)
        self.assertEqual(len(sa), 1)
        self.assertEqual(sorted(sa), [3])

        self.assertEqual(rdMD.CalcNumBridgeheadAtoms(m), 2)
        sa = []
        self.assertEqual(rdMD.CalcNumBridgeheadAtoms(m, atoms=sa), 2)
        self.assertEqual(len(sa), 2)
        self.assertEqual(sorted(sa), [6, 9])
Пример #2
0
def _compute_sas(mol: Mol, sa_model: Dict[int, float]) -> float:
    fp = rdMolDescriptors.GetMorganFingerprint(mol, 2)
    fps = fp.GetNonzeroElements()
    score1 = 0.
    nf = 0
    # for bitId, v in fps.items():
    for bitId, v in fps.items():
        nf += v
        sfp = bitId
        score1 += sa_model.get(sfp, -4) * v
    score1 /= nf

    # features score
    nAtoms = mol.GetNumAtoms()
    nChiralCenters = len(FindMolChiralCenters(mol, includeUnassigned=True))
    ri = mol.GetRingInfo()
    nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
    nBridgeheads = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
    nMacrocycles = 0
    for x in ri.AtomRings():
        if len(x) > 8:
            nMacrocycles += 1

    sizePenalty = nAtoms**1.005 - nAtoms
    stereoPenalty = math.log10(nChiralCenters + 1)
    spiroPenalty = math.log10(nSpiro + 1)
    bridgePenalty = math.log10(nBridgeheads + 1)
    macrocyclePenalty = 0.

    # ---------------------------------------
    # This differs from the paper, which defines:
    # macrocyclePenalty = math.log10(nMacrocycles+1)
    # This form generates better results when 2 or more macrocycles are present
    if nMacrocycles > 0:
        macrocyclePenalty = math.log10(2)

    score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty

    # correction for the fingerprint density
    # not in the original publication, added in version 1.1
    # to make highly symmetrical molecules easier to synthetise
    score3 = 0.
    if nAtoms > len(fps):
        score3 = math.log(float(nAtoms) / len(fps)) * .5

    sascore = score1 + score2 + score3

    # need to transform "raw" value into scale between 1 and 10
    min = -4.0
    max = 2.5
    sascore = 11. - (sascore - min + 1) / (max - min) * 9.
    # smooth the 10-end
    if sascore > 8.:
        sascore = 8. + math.log(sascore + 1. - 9.)
    if sascore > 10.:
        sascore = 10.0
    elif sascore < 1.:
        sascore = 1.0

    return sascore
Пример #3
0
def numBridgeheadsAndSpiro(mol, ri=None):
    """
    tbd
    """
    nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
    nBridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
    return nBridgehead, nSpiro
Пример #4
0
 def calculate(self):
     if self._type == "X":
         return self._calc_X()
     elif self._type in ["Atom", "HeavyAtom"]:
         return self._calc_all()
     elif self._type == "Spiro":
         return rdMolDescriptors.CalcNumSpiroAtoms(self.mol)
     elif self._type == "Bridgehead":
         return rdMolDescriptors.CalcNumBridgeheadAtoms(self.mol)
     else:
         return self._calc()
def numBridgeheadsAndSpiro(mol, ri=None):

    # Returns the number of spiro atoms (atoms shared
    # between rings that share exactly one atom)
    # 螺状
    nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)

    # Returns the number of bridgehead atoms (atoms
    # shared between rings that share at least two bonds)
    # 桥状
    nBridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)

    return nBridgehead, nSpiro
Пример #6
0
def feature_fp(smiles):
    mol = Chem.MolFromSmiles(smiles)
    fp = rdMolDescriptors.MQNs_(mol)
    
    fp.append(rdMolDescriptors.CalcNumRotatableBonds(mol))
    fp.append(rdMolDescriptors.CalcExactMolWt(mol))
    fp.append(rdMolDescriptors.CalcNumRotatableBonds(mol))
    fp.append(rdMolDescriptors.CalcFractionCSP3(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticCarbocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticRings((mol)))
    fp.append(rdMolDescriptors.CalcNumAromaticCarbocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAromaticHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAromaticRings(mol))
    fp.append(rdMolDescriptors.CalcNumBridgeheadAtoms(mol))
    fp.append(rdMolDescriptors.CalcNumRings(mol))
    fp.append(rdMolDescriptors.CalcNumAmideBonds(mol))
    fp.append(rdMolDescriptors.CalcNumHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumSpiroAtoms(mol))
    fp.append(rdMolDescriptors.CalcTPSA(mol))
    
    return np.array(fp)
def calculate_scalar_descriptors(molecule, symbols):
    features = list()
    features.append(rdMD.CalcAsphericity(molecule))
    features += list(rdMD.CalcCrippenDescriptors(molecule))
    features.append(rdMD.CalcExactMolWt(molecule))
    features.append(rdMD.CalcEccentricity(molecule))
    features.append(rdMD.CalcFractionCSP3(molecule))
    features.append(rdMD.CalcLabuteASA(molecule))
    features.append(rdMD.CalcNPR1(molecule))
    features.append(rdMD.CalcNPR2(molecule))
    features.append(rdMD.CalcHallKierAlpha(molecule))

    # elemental distribution
    symbols = np.array(symbols)
    features.append(np.sum(symbols == 'H'))
    features.append(np.sum(symbols == 'C'))
    features.append(np.sum(symbols == 'N'))
    features.append(np.sum(symbols == 'O'))
    features.append(np.sum(symbols == 'F'))

    # ring features
    features.append(rdMD.CalcNumAliphaticCarbocycles(molecule))
    features.append(rdMD.CalcNumAliphaticHeterocycles(molecule))
    features.append(rdMD.CalcNumAromaticCarbocycles(molecule))
    features.append(rdMD.CalcNumAromaticHeterocycles(molecule))
    features.append(rdMD.CalcNumSaturatedCarbocycles(molecule))
    features.append(rdMD.CalcNumSaturatedHeterocycles(molecule))
    features.append(rdMD.CalcNumSpiroAtoms(
        molecule))  # atom shared between rings with one bond
    features.append(rdMD.CalcNumBridgeheadAtoms(
        molecule))  # atom shared between rings with at least two bonds

    # other counts
    features.append(rdMD.CalcNumAmideBonds(molecule))
    features.append(rdMD.CalcNumHBA(molecule))  # number of hydrogen acceptors
    features.append(rdMD.CalcNumHBD(molecule))  # number of hydrogen donors

    return np.array(features)
Пример #8
0
def CalculateStandAloneDescriptor(molObject):
    """
	Get all standaloneDescriptor
	Args:

	Returns:
		List
	Raise:
		Exceptions
	"""
    value_list = []

    if AllChem.ComputeGasteigerCharges(molObject) == None:
        value_list.append(0.0)
    else:
        value_list.append(1.0)

    value_list.append(rdMolDescriptors.CalcNumAmideBonds(molObject))
    value_list.append(rdMolDescriptors.CalcNumSpiroAtoms(molObject))
    value_list.append(rdMolDescriptors.CalcNumBridgeheadAtoms(molObject))
    value_list += rdMolDescriptors.MQNs_(molObject)

    return value_list
Пример #9
0
def numBridgeheadsAndSpiro(mol):
    nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
    nBridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
    return nBridgehead, nSpiro
Пример #10
0
    def get_global_features(self, mol):
        u = []
        # Now get some specific features
        fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
        factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
        feats = factory.GetFeaturesForMol(mol)

        # First get some basic features
        natoms = mol.GetNumAtoms()
        nbonds = mol.GetNumBonds()
        mw = Descriptors.ExactMolWt(mol)
        HeavyAtomMolWt = Descriptors.HeavyAtomMolWt(mol)
        NumValenceElectrons = Descriptors.NumValenceElectrons(mol)
        ''' # These four descriptors are producing the value of infinity for refcode_csd = YOLJUF (CCOP(=O)(Cc1ccc(cc1)NC(=S)NP(OC(C)C)(OC(C)C)[S])OCC\t\n)
        MaxAbsPartialCharge = Descriptors.MaxAbsPartialCharge(mol)
        MaxPartialCharge = Descriptors.MaxPartialCharge(mol)
        MinAbsPartialCharge = Descriptors.MinAbsPartialCharge(mol)
        MinPartialCharge = Descriptors.MinPartialCharge(mol)
        '''
        #        FpDensityMorgan1 = Descriptors.FpDensityMorgan1(mol)
        #        FpDensityMorgan2 = Descriptors.FpDensityMorgan2(mol)
        #        FpDensityMorgan3 = Descriptors.FpDensityMorgan3(mol)

        # Get some features using chemical feature factory

        nbrAcceptor = 0
        nbrDonor = 0
        nbrHydrophobe = 0
        nbrLumpedHydrophobe = 0
        nbrPosIonizable = 0
        nbrNegIonizable = 0

        for j in range(len(feats)):
            #print(feats[j].GetFamily(), feats[j].GetType())
            if ('Acceptor' == (feats[j].GetFamily())):
                nbrAcceptor = nbrAcceptor + 1
            elif ('Donor' == (feats[j].GetFamily())):
                nbrDonor = nbrDonor + 1
            elif ('Hydrophobe' == (feats[j].GetFamily())):
                nbrHydrophobe = nbrHydrophobe + 1
            elif ('LumpedHydrophobe' == (feats[j].GetFamily())):
                nbrLumpedHydrophobe = nbrLumpedHydrophobe + 1
            elif ('PosIonizable' == (feats[j].GetFamily())):
                nbrPosIonizable = nbrPosIonizable + 1
            elif ('NegIonizable' == (feats[j].GetFamily())):
                nbrNegIonizable = nbrNegIonizable + 1
            else:
                pass
                #print(feats[j].GetFamily())

        # Now get some features using rdMolDescriptors

        moreGlobalFeatures = [rdm.CalcNumRotatableBonds(mol), rdm.CalcChi0n(mol), rdm.CalcChi0v(mol), \
                            rdm.CalcChi1n(mol), rdm.CalcChi1v(mol), rdm.CalcChi2n(mol), rdm.CalcChi2v(mol), \
                            rdm.CalcChi3n(mol), rdm.CalcChi4n(mol), rdm.CalcChi4v(mol), \
                            rdm.CalcFractionCSP3(mol), rdm.CalcHallKierAlpha(mol), rdm.CalcKappa1(mol), \
                            rdm.CalcKappa2(mol), rdm.CalcLabuteASA(mol), \
                            rdm.CalcNumAliphaticCarbocycles(mol), rdm.CalcNumAliphaticHeterocycles(mol), \
                            rdm.CalcNumAliphaticRings(mol), rdm.CalcNumAmideBonds(mol), \
                            rdm.CalcNumAromaticCarbocycles(mol), rdm.CalcNumAromaticHeterocycles(mol), \
                            rdm.CalcNumAromaticRings(mol), rdm.CalcNumBridgeheadAtoms(mol), rdm.CalcNumHBA(mol), \
                            rdm.CalcNumHBD(mol), rdm.CalcNumHeteroatoms(mol), rdm.CalcNumHeterocycles(mol), \
                            rdm.CalcNumLipinskiHBA(mol), rdm.CalcNumLipinskiHBD(mol), rdm.CalcNumRings(mol), \
                            rdm.CalcNumSaturatedCarbocycles(mol), rdm.CalcNumSaturatedHeterocycles(mol), \
                            rdm.CalcNumSaturatedRings(mol), rdm.CalcNumSpiroAtoms(mol), rdm.CalcTPSA(mol)]


        u = [natoms, nbonds, mw, HeavyAtomMolWt, NumValenceElectrons, \
            nbrAcceptor, nbrDonor, nbrHydrophobe, nbrLumpedHydrophobe, \
            nbrPosIonizable, nbrNegIonizable]

        u = u + moreGlobalFeatures
        u = np.array(u).T
        # Some of the descriptors produice NAN. We can convert them to 0
        # If you are getting outliers in the training or validation set this could be
        # Because some important features were set to zero here because it produced NAN
        # Removing those features from the feature set might remove the outliers

        #u[np.isnan(u)] = 0

        #u = torch.tensor(u, dtype=torch.float)
        return (u)
Пример #11
0
def num_bridgeheads_and_spiro(mol):
    n_spiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
    n_bridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
    return n_bridgehead, n_spiro
Пример #12
0
def get_molecular_features(dataframe, mol_list):
    df = dataframe
    for i in range(len(mol_list)):
        print("Getting molecular features for molecule: ", i)
        mol = mol_list[i]
        natoms = mol.GetNumAtoms()
        nbonds = mol.GetNumBonds()
        mw = Descriptors.ExactMolWt(mol)
        df.at[i,"NbrAtoms"] = natoms
        df.at[i,"NbrBonds"] = nbonds
        df.at[i,"mw"] = mw
        df.at[i,'HeavyAtomMolWt'] = Chem.Descriptors.HeavyAtomMolWt(mol)
        df.at[i,'NumValenceElectrons'] = Chem.Descriptors.NumValenceElectrons(mol)
        ''' # These four descriptors are producing the value of infinity for refcode_csd = YOLJUF (CCOP(=O)(Cc1ccc(cc1)NC(=S)NP(OC(C)C)(OC(C)C)[S])OCC\t\n)
        df.at[i,'MaxAbsPartialCharge'] = Chem.Descriptors.MaxAbsPartialCharge(mol)
        df.at[i,'MaxPartialCharge'] = Chem.Descriptors.MaxPartialCharge(mol)
        df.at[i,'MinAbsPartialCharge'] = Chem.Descriptors.MinAbsPartialCharge(mol)
        df.at[i,'MinPartialCharge'] = Chem.Descriptors.MinPartialCharge(mol)
        '''
        df.at[i,'FpDensityMorgan1'] = Chem.Descriptors.FpDensityMorgan1(mol)
        df.at[i,'FpDensityMorgan2'] = Chem.Descriptors.FpDensityMorgan2(mol)
        df.at[i,'FpDensityMorgan3'] = Chem.Descriptors.FpDensityMorgan3(mol)
        
        #print(natoms, nbonds)
        
        # Now get some specific features
        fdefName = os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef')
        factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
        feats = factory.GetFeaturesForMol(mol)
        #df["Acceptor"] = 0
        #df["Aromatic"] = 0
        #df["Hydrophobe"] = 0
        nbrAcceptor = 0
        nbrDonor = 0
        nbrHydrophobe = 0
        nbrLumpedHydrophobe = 0
        nbrPosIonizable = 0
        nbrNegIonizable = 0
        for j in range(len(feats)):
            #print(feats[j].GetFamily(), feats[j].GetType())
            if ('Acceptor' == (feats[j].GetFamily())):
                nbrAcceptor = nbrAcceptor + 1
            elif ('Donor' == (feats[j].GetFamily())):
                nbrDonor = nbrDonor + 1
            elif ('Hydrophobe' == (feats[j].GetFamily())):
                nbrHydrophobe = nbrHydrophobe + 1
            elif ('LumpedHydrophobe' == (feats[j].GetFamily())):
                nbrLumpedHydrophobe = nbrLumpedHydrophobe + 1
            elif ('PosIonizable' == (feats[j].GetFamily())):
                nbrPosIonizable = nbrPosIonizable + 1
            elif ('NegIonizable' == (feats[j].GetFamily())):
                nbrNegIonizable = nbrNegIonizable + 1                
            else:
                pass#print(feats[j].GetFamily())
                        
        df.at[i,"Acceptor"] = nbrAcceptor
        df.at[i,"Donor"] = nbrDonor
        df.at[i,"Hydrophobe"] = nbrHydrophobe
        df.at[i,"LumpedHydrophobe"] = nbrLumpedHydrophobe
        df.at[i,"PosIonizable"] = nbrPosIonizable
        df.at[i,"NegIonizable"] = nbrNegIonizable
        
        # We can also get some more molecular features using rdMolDescriptors
        
        df.at[i,"NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol)
        df.at[i,"CalcChi0n"] = rdMolDescriptors.CalcChi0n(mol)
        df.at[i,"CalcChi0v"] = rdMolDescriptors.CalcChi0v(mol)
        df.at[i,"CalcChi1n"] = rdMolDescriptors.CalcChi1n(mol)
        df.at[i,"CalcChi1v"] = rdMolDescriptors.CalcChi1v(mol)
        df.at[i,"CalcChi2n"] = rdMolDescriptors.CalcChi2n(mol)
        df.at[i,"CalcChi2v"] = rdMolDescriptors.CalcChi2v(mol)
        df.at[i,"CalcChi3n"] = rdMolDescriptors.CalcChi3n(mol)
        df.at[i,"CalcChi3v"] = rdMolDescriptors.CalcChi3v(mol)
        df.at[i,"CalcChi4n"] = rdMolDescriptors.CalcChi4n(mol)
        df.at[i,"CalcChi4v"] = rdMolDescriptors.CalcChi4v(mol)
        df.at[i,"CalcFractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol)
        df.at[i,"CalcHallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol)
        df.at[i,"CalcKappa1"] = rdMolDescriptors.CalcKappa1(mol)
        df.at[i,"CalcKappa2"] = rdMolDescriptors.CalcKappa2(mol)
        #df.at[i,"CalcKappa3"] = rdMolDescriptors.CalcKappa3(mol)
        df.at[i,"CalcLabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol)
        df.at[i,"CalcNumAliphaticCarbocycles"] = rdMolDescriptors.CalcNumAliphaticCarbocycles(mol)
        df.at[i,"CalcNumAliphaticHeterocycles"] = rdMolDescriptors.CalcNumAliphaticHeterocycles(mol)
        df.at[i,"CalcNumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol)
        df.at[i,"CalcNumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol)
        df.at[i,"CalcNumAromaticCarbocycles"] = rdMolDescriptors.CalcNumAromaticCarbocycles(mol)
        df.at[i,"CalcNumAromaticHeterocycles"] = rdMolDescriptors.CalcNumAromaticHeterocycles(mol)
        df.at[i,"CalcNumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol)
        df.at[i,"CalcNumBridgeheadAtoms"] = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
        df.at[i,"CalcNumHBA"] = rdMolDescriptors.CalcNumHBA(mol)
        df.at[i,"CalcNumHBD"] = rdMolDescriptors.CalcNumHBD(mol)
        df.at[i,"CalcNumHeteroatoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol)
        df.at[i,"CalcNumHeterocycles"] = rdMolDescriptors.CalcNumHeterocycles(mol)
        df.at[i,"CalcNumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol)
        df.at[i,"CalcNumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol)
        df.at[i,"CalcNumRings"] = rdMolDescriptors.CalcNumRings(mol)
        df.at[i,"CalcNumSaturatedCarbocycles"] = rdMolDescriptors.CalcNumSaturatedCarbocycles(mol)
        df.at[i,"CalcNumSaturatedHeterocycles"] = rdMolDescriptors.CalcNumSaturatedHeterocycles(mol)
        df.at[i,"CalcNumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol)
        df.at[i,"CalcNumSpiroAtoms"] = rdMolDescriptors.CalcNumSpiroAtoms(mol)
        df.at[i,"CalcTPSA"] = rdMolDescriptors.CalcTPSA(mol)
    return(df)
Пример #13
0
    def __call__(self, smile):
        if _fscores is None:
            self.readFragmentScores()
        m = Chem.MolFromSmiles(smile)
        if m:
            try:
                # fragment score
                fp = rdMolDescriptors.GetMorganFingerprint(
                    m, 2)  #<- 2 is the *radius* of the circular fingerprint
                fps = fp.GetNonzeroElements()
                score1 = 0.
                nf = 0
                for bitId, v in iteritems(fps):
                    nf += v
                    sfp = bitId
                    score1 += _fscores.get(sfp, -4) * v
                score1 /= nf

                # features score
                nAtoms = m.GetNumAtoms()
                nChiralCenters = len(
                    Chem.FindMolChiralCenters(m, includeUnassigned=True))
                ri = m.GetRingInfo()
                nBridgeheads = rdMolDescriptors.CalcNumBridgeheadAtoms(m)
                nSpiro = nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(m)
                nMacrocycles = 0
                for x in ri.AtomRings():
                    if len(x) > 8:
                        nMacrocycles += 1

                sizePenalty = nAtoms**1.005 - nAtoms
                stereoPenalty = math.log10(nChiralCenters + 1)
                spiroPenalty = math.log10(nSpiro + 1)
                bridgePenalty = math.log10(nBridgeheads + 1)
                macrocyclePenalty = 0.
                # ---------------------------------------
                # This differs from the paper, which defines:
                #  macrocyclePenalty = math.log10(nMacrocycles+1)
                # This form generates better results when 2 or more macrocycles are present
                if nMacrocycles > 0:
                    macrocyclePenalty = math.log10(2)
                score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty
                # correction for the fingerprint density
                # not in the original publication, added in version 1.1
                # to make highly symmetrical molecules easier to synthetise
                score3 = 0.
                if nAtoms > len(fps):
                    score3 = math.log(float(nAtoms) / len(fps)) * .5
                sascore = score1 + score2 + score3

                # need to transform "raw" value into scale between 1 and 10
                min_score = -4.0
                max_score = 2.5
                sascore = 11. - (sascore - min_score + 1) / (max_score -
                                                             min_score) * 9.
                # smooth the 10-end
                if sascore > 8.: sascore = 8. + math.log(sascore + 1. - 9.)
                if sascore > 10.: sascore = 10.0
                elif sascore < 1.: sascore = 1.0
                sascore = math.exp(1 - sascore)  # minimize the sascore
                return sascore
            except:
                return 0.0
        else:
            return 0.0
Пример #14
0
 ALL_SIM = list(itertools.chain.from_iterable(ALL_SIM))
 ALL_SIM = [x[0] for x in ALL_SIM]
 top_idx = np.argsort(ALL_SIM)[-100: ]
 A = [ALL_SIM[i] for i in top_idx]
 
 # Print statistics for the UNfiltered medians: 
 print('Max: {} Min: {} Mean: {} Std: {}'.format(max(A), min(A), np.mean(A), np.std(A)))
         
 # pick the best filtered
 ALL_PATHS =  list(itertools.chain.from_iterable(ALL_PATHS))
 collect_unfilt[trip_id] = [ALL_PATHS, ALL_SIM]
         
 better_smi = []
 for k,smi in enumerate(ALL_PATHS): 
     mol = Chem.MolFromSmiles(smi)
     if rdMolDescriptors.CalcNumBridgeheadAtoms(mol)==0 and rdMolDescriptors.CalcNumSpiroAtoms(mol)==0:
         # better_smi.append(get_best_taut(mol))
         mol, smi_canon, _ = sanitize_smiles(smi)
         better_smi.append((smi_canon, k))
                        
         
         
 filtered_smiles = [x[0] for x in better_smi]
 filtered_scores = [ALL_SIM[x[1]] for x in better_smi]
 collect_filt[trip_id] = [filtered_smiles, filtered_scores]
         
 top_idx_filt = np.argsort(filtered_scores)[-100: ]
 A = [filtered_scores[i] for i in top_idx_filt]
 top_filt_smi = [filtered_smiles[i] for i in top_idx_filt]
 
 # Print statistics for the filtered medians: 
Пример #15
0
def synthetic_accessibility(mol, _fscores=None):
    '''
    calculation of synthetic accessibility score as described in:

    'Estimation of Synthetic Accessibility Score of Drug-like Molecules 
    based on Molecular Complexity and Fragment Contributions'
    Peter Ertl and Ansgar Schuffenhauer
    Journal of Cheminformatics 1:8 (2009)
    http://www.jcheminf.com/content/1/1/8

    several small modifications to the original paper are included
    particularly slightly different formula for marocyclic penalty
    and taking into account also molecule symmetry (fingerprint density)

    for a set of 10k diverse molecules the agreement between the original method
    as implemented in PipelinePilot and this implementation is r2 = 0.97

    peter ertl & greg landrum, september 2013

    Parameters
    ----------
    mol : Mol

    Returns
    -------
    float : synthetic accessibility score
    '''
    if _fscores is None:
        with gzip.open(os.path.join(os.path.dirname(__file__), 'fpscores.pkl.gz'), 'rb') as f:
            _fscores = pickle.load(f)

    out_dict = {}
    for each_list in _fscores:
        for each_idx in range(1,len(each_list)):
            out_dict[each_list[each_idx]] = float(each_list[0])
    _fscores = out_dict

    # fragment score
    # 2 is the *radius* of the circular fingerprint
    fingerprint = rdMolDescriptors.GetMorganFingerprint(mol, 2)
    fingerprints = fingerprint.GetNonzeroElements()
    score1 = 0.
    nf = 0
    for bit_id, value in iteritems(fingerprints):
        nf += value
        sfp = bit_id
        score1 += _fscores.get(sfp, -4) * value
    score1 /= nf

    # features score
    num_atoms = mol.GetNumAtoms()
    num_chiral_centers = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
    ring_info = mol.GetRingInfo()
    num_spiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
    num_bridgeheads = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
    num_macrocycles = 0
    for each_ring in ring_info.AtomRings():
        if len(each_ring) > 8:
            num_macrocycles += 1

    size_penalty = num_atoms ** 1.005 - num_atoms
    stereo_penalty = math.log10(num_chiral_centers + 1)
    spiro_penalty = math.log10(num_spiro + 1)
    bridge_penalty = math.log10(num_bridgeheads + 1)
    macrocycle_penalty = 0.
    # ---------------------------------------
    # This differs from the paper, which defines:
    #  macrocycle_penalty = math.log10(num_macrocycles+1)
    # This form generates better results when 2 or more macrocycles are present
    if num_macrocycles > 0:
        macrocycle_penalty = math.log10(2)

    score2 = 0. -size_penalty -stereo_penalty -spiro_penalty -bridge_penalty -macrocycle_penalty

    # correction for the fingerprint density
    # not in the original publication, added in version 1.1
    # to make highly symmetrical molecules easier to synthetise
    score3 = 0.
    if num_atoms > len(fingerprints):
        score3 = math.log(float(num_atoms) / len(fingerprints)) * .5

    sascore = score1 + score2 + score3
    
    # need to transform "raw" value into scale between 1 and 10
    min_score = -4.0
    max_score = 2.5
    sascore = 11. - (sascore - min_score + 1) / (max_score - min_score) * 9.
    # smooth the 10-end
    if sascore > 8.:
        sascore = 8. + math.log(sascore+1.-9.)
    if sascore > 10.:
        sascore = 10.0
    elif sascore < 1.:
        sascore = 1.0

    return sascore