Пример #1
0
def PhyChem(smiles):
    """ Calculating the 19D physicochemical descriptors for each molecules,
    the value has been normalized with Gaussian distribution.

    Arguments:
        smiles (list): list of SMILES strings.
    Returns:
        props (ndarray): m X 19 matrix as normalized PhysChem descriptors.
            m is the No. of samples
    """
    props = []
    for smile in smiles:
        mol = Chem.MolFromSmiles(smile)
        try:
            MW = desc.MolWt(mol)
            LOGP = Crippen.MolLogP(mol)
            HBA = Lipinski.NumHAcceptors(mol)
            HBD = Lipinski.NumHDonors(mol)
            rotable = Lipinski.NumRotatableBonds(mol)
            amide = AllChem.CalcNumAmideBonds(mol)
            bridge = AllChem.CalcNumBridgeheadAtoms(mol)
            heteroA = Lipinski.NumHeteroatoms(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            spiro = AllChem.CalcNumSpiroAtoms(mol)
            FCSP3 = AllChem.CalcFractionCSP3(mol)
            ring = Lipinski.RingCount(mol)
            Aliphatic = AllChem.CalcNumAliphaticRings(mol)
            aromatic = AllChem.CalcNumAromaticRings(mol)
            saturated = AllChem.CalcNumSaturatedRings(mol)
            heteroR = AllChem.CalcNumHeterocycles(mol)
            TPSA = MolSurf.TPSA(mol)
            valence = desc.NumValenceElectrons(mol)
            mr = Crippen.MolMR(mol)
            # charge = AllChem.ComputeGasteigerCharges(mol)
            prop = [
                MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy,
                spiro, FCSP3, ring, Aliphatic, aromatic, saturated, heteroR,
                TPSA, valence, mr
            ]
        except Exception:
            print(smile)
            prop = [0] * 19
        props.append(prop)
    props = np.array(props)
    props = Scaler().fit_transform(props)
    return props
def SA_score(smile):
    mol = Chem.MolFromSmiles(smile)
    # fragment score
    fp = Chem.GetMorganFingerprint(mol, 2)
    fps = fp.GetNonzeroElements()
    score1 = 0.
    nf = 0
    # for bitId, v in fps.items():
    for bitId, v in fps.items():
        nf += v
        sfp = bitId
        score1 += SA_model.get(sfp, -4) * v
    score1 /= nf

    # features score
    nAtoms = mol.GetNumAtoms()
    nChiralCenters = len(Chem.FindMolChiralCenters(mol,
                                                   includeUnassigned=True))
    ri = mol.GetRingInfo()
    nSpiro = Chem.CalcNumSpiroAtoms(mol)
    nBridgeheads = Chem.CalcNumBridgeheadAtoms(mol)
    nMacrocycles = 0
    for x in ri.AtomRings():
        if len(x) > 8:
            nMacrocycles += 1

    sizePenalty = nAtoms**1.005 - nAtoms
    stereoPenalty = math.log10(nChiralCenters + 1)
    spiroPenalty = math.log10(nSpiro + 1)
    bridgePenalty = math.log10(nBridgeheads + 1)
    macrocyclePenalty = 0.
    # ---------------------------------------
    # This differs from the paper, which defines:
    #  macrocyclePenalty = math.log10(nMacrocycles+1)
    # This form generates better results when 2 or more macrocycles are present
    if nMacrocycles > 0:
        macrocyclePenalty = math.log10(2)

    score2 = 0. - sizePenalty - stereoPenalty - \
        spiroPenalty - bridgePenalty - macrocyclePenalty

    # correction for the fingerprint density
    # not in the original publication, added in version 1.1
    # to make highly symmetrical molecules easier to synthetise
    score3 = 0.
    if nAtoms > len(fps):
        score3 = math.log(float(nAtoms) / len(fps)) * .5

    sascore = score1 + score2 + score3

    # need to transform "raw" value into scale between 1 and 10
    min = -4.0
    max = 2.5
    sascore = 11. - (sascore - min + 1) / (max - min) * 9.
    # smooth the 10-end
    if sascore > 8.:
        sascore = 8. + math.log(sascore + 1. - 9.)
    if sascore > 10.:
        sascore = 10.0
    elif sascore < 1.:
        sascore = 1.0
    val = remap(sascore, 5, 1.5)
    val = np.clip(val, 0.0, 1.0)
    return val
Пример #3
0
def NumBridgeheadsAndSpiro(mol, ri=None):
    nSpiro = AllChem.CalcNumSpiroAtoms(mol)
    nBridgehead = AllChem.CalcNumBridgeheadAtoms(mol)

    return nBridgehead, nSpiro