Exemplo n.º 1
0
def extract_Pharm2D(column,
                    minPointCount=2,
                    maxPointCount=3,
                    bins=[(0, 2), (2, 5), (5, 8)],
                    from_smiles=True):
    """Extract Pharm2D fingerprint
    :param column: Pandas Series, containing smiles or RDKit mol object
    :param minPointCount: int
    :param maxPointCount: int
    :param bins: lits of tuples
    :param from_smiles: bool, indicate whether column contains smiles string
    :return: feature_Pharm2D: Pandas Series, containing Pharm2D features
    """
    sigFactory = SigFactory(featFactory,
                            minPointCount=minPointCount,
                            maxPointCount=maxPointCount,
                            trianglePruneBins=False)
    sigFactory.SetBins(bins)
    sigFactory.Init()

    def get_Pharm2D(x):
        mol = Chem.MolFromSmiles(x)
        if (mol is None) or (len(mol.GetAtoms()) == 0):
            return [0] * sigFactory.GetSigSize()
        else:
            return Generate.Gen2DFingerprint(mol, sigFactory)

    fp = column.apply(lambda x: get_Pharm2D(x))
    return np.array(list(fp))
Exemplo n.º 2
0
def pharmacophore(mol, target):
    i = 0
    print('mol/target', mol, target)
    mol.standardize()
    target.standardize()
    mol = str(mol)
    mol = mol.replace('N(=O)O', '[N+](=O)[O-]')
    mol = mol.replace('N(O)=O', '[N+]([O-])=O')
    mol = mol.replace('n(O)', '[n+]([O-])')
    target = str(target)
    target = target.replace('N(=O)O', '[N+](=O)[O-]')
    target = target.replace('N(O)=O', '[N+]([O-])=O')
    target = target.replace('n(O)', '[n+]([O-])')
    featfactory = load_factory()
    sigfactory = SigFactory(featfactory,
                            minPointCount=2,
                            maxPointCount=3,
                            trianglePruneBins=False)
    sigfactory.SetBins([(0, 2), (2, 5), (5, 8)])
    sigfactory.Init()
    mol1 = Chem.MolFromSmiles(mol)
    mol2 = Chem.MolFromSmiles(target)
    if mol1 and mol2:
        fp1 = Generate.Gen2DFingerprint(mol1, sigfactory)
        fp2 = Generate.Gen2DFingerprint(mol2, sigfactory)
        sims = DataStructs.TanimotoSimilarity(fp1, fp2)
        return sims
    else:
        i = i + 1
        print('ошибка', i, mol)
        return -100
Exemplo n.º 3
0
def GetPharmacoPFPs(mol,
                    bins=[(i, i + 1) for i in range(20)],
                    minPointCount=2,
                    maxPointCount=2,
                    return_bitInfo=False):
    '''
    Note: maxPointCont with 3 is slowly
    
    bins = [(i,i+1) for i in range(20)], 
    maxPonitCount=2 for large-scale computation
    
    '''
    MysigFactory = SigFactory(featFactory,
                              trianglePruneBins=False,
                              minPointCount=minPointCount,
                              maxPointCount=maxPointCount)
    MysigFactory.SetBins(bins)
    MysigFactory.Init()

    res = Generate.Gen2DFingerprint(mol, MysigFactory)
    arr = np.array(list(res)).astype(np.bool)
    if return_bitInfo:
        description = []
        for i in range(len(res)):
            description.append(MysigFactory.GetBitDescription(i))
        return arr, description

    return arr
Exemplo n.º 4
0
    def get_2Dfp(self, rdmols):
        #: ファーマコフォアの初期設定
        fdefName = r'ensemble/BaseFeatures.fdef'
        featFactory = ChemicalFeatures.BuildFeatureFactory(fdefName)
        sigFactory = SigFactory(featFactory, minPointCount=2, maxPointCount=3)

        #: ファーマコフォア間の距離を離散化する
        sigFactory.SetBins([(0, 2), (2, 4)])
        sigFactory.Init()

        fps1 = [
            Generate.Gen2DFingerprint(mol, sigFactory).ToBitString()
            for mol in rdmols
        ]
        fps2 = [list(map(int, list(fps))) for fps in fps1]
        fps3 = np.array(fps2)

        return fps3
Exemplo n.º 5
0
def pool_init(fdef_fname, bin_step):
    global process_factory
    global sig_factory
    process_factory = ChemicalFeatures.BuildFeatureFactory(
        fdef_fname) if fdef_fname else None
    sig_factory = SigFactory(process_factory,
                             minPointCount=2,
                             maxPointCount=3,
                             trianglePruneBins=False)
    q = []
    i = bin_step
    j = 0
    while i < 20:
        q.append((j, i))
        j = i
        i += bin_step
    sig_factory.SetBins(q)
    sig_factory.Init()
Exemplo n.º 6
0
def CalculatePharm2D3pointFingerprint(mol, featFactory=featFactory):
    """
    Calculate Pharm2D3point Fingerprints
    """
    sigFactory_3point = SigFactory(featFactory,
                                   minPointCount=3,
                                   maxPointCount=3)
    sigFactory_3point.SetBins([(0, 2), (2, 4), (4, 6), (6, 10)])
    sigFactory_3point.Init()
    res = Generate.Gen2DFingerprint(mol, sigFactory_3point)

    res_keys = tuple(res.GetOnBits())
    init_list = [0] * 2135
    for res_key in res_keys:
        init_list[res_key] = 1

    BitVect = tuple(init_list)

    return BitVect, res_keys, res
Exemplo n.º 7
0
def read_file(fname, fcfp4, fdef_fname):
    if not fcfp4:
        featFactory = ChemicalFeatures.BuildFeatureFactory(fdef_fname)
        sigFactory = SigFactory(featFactory, minPointCount=2, maxPointCount=3, trianglePruneBins=False)
        sigFactory.SetBins([(0, 2), (2, 5), (5, 8)])
        sigFactory.Init()
    d = defaultdict(list)
    with open(fname) as f:
        for row in f:
            smiles, ids, aff = row.strip().split('\t')
            if smiles is not None:
                mol = Chem.MolFromSmiles(smiles)
                d['mol_name'].append(ids)
                d['smiles'].append(smiles)
                if fcfp4:
                    d['fingerprint'].append(AllChem.GetMorganFingerprint(mol, 2, useFeatures=True))
                else:
                    d['fingerprint'].append(Generate.Gen2DFingerprint(mol, sigFactory))
    return d
Exemplo n.º 8
0
def CalculatePharm2D2pointFingerprint(mol, featFactory=featFactory):
    """
    Calculate Pharm2D2point Fingerprints
    """
    sigFactory_2point = SigFactory(featFactory,
                                   minPointCount=2,
                                   maxPointCount=2)
    sigFactory_2point.SetBins([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6),
                               (6, 7), (7, 8), (8, 9)])
    sigFactory_2point.Init()
    res = Generate.Gen2DFingerprint(mol, sigFactory_2point)

    res_keys = tuple(res.GetOnBits())
    init_list = [0] * 135
    for res_key in res_keys:
        init_list[res_key] = 1

    BitVect = tuple(init_list)

    return BitVect, res_keys, res
# 四、2D药效团指纹
# # 4.1 参数设置
# 对上面计算的性质进行组合可以用作分子的2D药效团。 药效团可以进一步转化为分子药效团指纹。
# 参考文件
fdefName = os.path.join(
    RDConfig.RDDataDir,
    '/drug_development/studyRdkit/st_rdcit/data/BaseFeatures.fdef')
# 实例化特征工厂
featFactory = ChemicalFeatures.BuildFeatureFactory(fdefName)

# 使用特征工厂再来构建指纹工厂signature,factory用于设置指纹参数
# 构建指纹工厂 :
SigFactory(
    featFactory,  # 特征工厂
    useCounts=False,  # 默认False。False不考虑指纹频数,并生成SparseBitVect
    minPointCount=2,  # 默认为2.生成指纹时包括的最少的药效团数量。
    maxPointCount=3,  # 默认为3。生成指纹时包括的最多的药效团数量。
    ...)
sigFactory = SigFactory(featFactory, minPointCount=2, maxPointCount=3)
# 对拓扑距离进行分段
sigFactory.SetBins([(0, 2), (2, 5), (5, 8)])
# 每次修改参数后,都要初始化一下
sigFactory.Init()
# 计算指纹的长度
print('指纹长度=', sigFactory.GetSigSize())  # 指纹长度= 2988

# # 4.2 生成2D药效团指纹
# 指纹工厂中的参数设置完毕,接下来就可以生成2D指纹了。
# 计算2D药效团指纹 :
Gen2DFingerprint(
    mol,  # 要计算指纹的mol对象
Exemplo n.º 10
0
def get_factory():
    featFactory = ChemicalFeatures.BuildFeatureFactoryFromString(fdef)
    factory = SigFactory(featFactory, minPointCount=2, maxPointCount=3, useCounts=True, trianglePruneBins=False)
    factory.SetBins(defaultBins)
    factory.Init()
    return factory
Exemplo n.º 11
0
import os
import argparse
import pkg_resources
import pandas as pd
from multiprocessing import Pool
from rdkit.Chem import ChemicalFeatures
from rdkit.Chem.Pharm2D import Generate
from rdkit.Chem.Pharm2D.SigFactory import SigFactory
from .read_input import read_input
from .read_input import calc_max_tau

fdef_fname = pkg_resources.resource_filename(
    __name__, 'pmapper_backlog/smarts_features.fdef')
featFactory = ChemicalFeatures.BuildFeatureFactory(fdef_fname)
sigFactory = SigFactory(featFactory,
                        minPointCount=2,
                        maxPointCount=3,
                        trianglePruneBins=False)
sigFactory.SetBins([(0, 2), (2, 5), (5, 8)])
sigFactory.Init()


def _ph_rdkit(mols_tup):
    mol, name, act, _ = mols_tup
    ph = Generate.Gen2DFingerprint(mol, sigFactory)
    tmp = pd.DataFrame(columns=range(ph.GetNumBits()))
    ph_bits = list(ph.GetOnBits())
    for n_bit in ph_bits:
        tmp.loc[name, n_bit] = 1
    tmp.loc[name, 'mol_id'] = name
    tmp.loc[name, 'act'] = act
    tmp = tmp.fillna(0)
Exemplo n.º 12
0
class AntidecoysSettings(Settings):
    """
    A specialized class that holds the settings
    for the anti-decoys algorithm.

    """
    def __init__(
        self,
        source,
        target,
        storage_dir=os.path.abspath(
            'antidecoys_data'
        )  # path to a directory where the results will be stored
        ,
        max_threads=None  # maximum number of threads to use in parallel computations
        ,
        tree_params=None  # custom parameters (same for both trees)
        ,
        max_iters=100  # maximum number of iterations to spend looking for a single path
        ,
        verbose=False  # require verbose output
        ,
        fg_bins=((0, 2), (2, 5), (5, 8)
                 )  # distance bins in the pharmacophore fingerprint
        ,
        fg_min_points=2  # min number of features encoded in the pharmacophore fingerprint
        ,
        fg_max_points=3  # max number of features encoded in the pharmacophore fingerprint
        ,
        min_accepted=1000  # minimum number of morphs the filter will accept on every iteration
        ,
        common_bits_max_thrs=0.75  # maximum common bits percentage the filter will accept on every iteration
        ,
        common_bits_mean_thrs=0.5  # if for the mols selected by the filter the mean common bits percentage falls below this value, antidecoys will be turned off
        ,
        antidecoys_min_iters=10  # minimum number of iterations where antidecoys are optimized
        ,
        antidecoys_max_iters=50  # maximum number of iterations where antidecoys are optimized
        ,
        distance_thrs=0.2  # turn antidecoys filter off when the distance between two closest molecules from each tree gets below this value
    ):
        super(AntidecoysSettings,
              self).__init__(source, target, storage_dir, max_threads,
                             max_iters, tree_params, verbose)
        self.fg_bins = fg_bins
        """
        distance bins in the pharmacophore fingerprint (as described `here <http://www.rdkit.org/docs/GettingStartedInPython.html#d-pharmacophore-fingerprints>`_)
        """
        self.fg_min_points = fg_min_points
        """
        min number of features encoded in the pharmacophore fingerprint (as described `here <http://www.rdkit.org/docs/GettingStartedInPython.html#d-pharmacophore-fingerprints>`_)
        """
        self.fg_max_points = fg_max_points
        """
        max number of features encoded in the pharmacophore fingerprint (as described `here <http://www.rdkit.org/docs/GettingStartedInPython.html#d-pharmacophore-fingerprints>`_)
        """
        self.min_accepted = min_accepted
        """
        minimum number of morphs the antidecoys filter will accept on every iteration
        """
        self.common_bits_max_thrs = common_bits_max_thrs
        """
        maximum percentage of shared bits between a structure and the anti-fingerprint that the filter will accept on every iteration
        """
        self.common_bits_mean_thrs = common_bits_mean_thrs
        """
        if for the structures that survived the filter the mean common bits percentage falls below this value, antidecoys will be turned off
        """
        self.antidecoys_min_iters = antidecoys_min_iters
        """
        minimum number of iterations that will use the antidecoys filter
        """
        self.antidecoys_max_iters = antidecoys_max_iters
        """
        maximum number of iterations that will use the antidecoys filter
        """
        self.distance_thrs = distance_thrs
        """
        turn antidecoys filter off when the distance between two closest molecules from each tree gets below this value
        """

        # stuff for the pharmacophore fingerprints
        self._fdef_file = os.path.join(
            RDConfig.RDDataDir,
            'BaseFeatures.fdef')  # get basic feature definitions
        self._feature_factory = ChemicalFeatures.BuildFeatureFactory(
            self._fdef_file)  # make feature factory
        self.signature_factory = SigFactory(
            self._feature_factory,
            minPointCount=self.fg_min_points,
            maxPointCount=self.fg_max_points,
            trianglePruneBins=False)  # make signature factory
        self.signature_factory.SetBins(self.fg_bins)  # set the distance bins
        self.signature_factory.Init()
Exemplo n.º 13
0
def _init():
    global labels, patts, factory
    featFactory = ChemicalFeatures.BuildFeatureFactoryFromString(fdef)
    factory = SigFactory(featFactory, minPointCount=2, maxPointCount=3)
    factory.SetBins(defaultBins)
    factory.Init()