def GetPharmacoPFPs(mol, bins=[(i, i + 1) for i in range(20)], minPointCount=2, maxPointCount=2, return_bitInfo=False): ''' Note: maxPointCont with 3 is slowly bins = [(i,i+1) for i in range(20)], maxPonitCount=2 for large-scale computation ''' MysigFactory = SigFactory(featFactory, trianglePruneBins=False, minPointCount=minPointCount, maxPointCount=maxPointCount) MysigFactory.SetBins(bins) MysigFactory.Init() res = Generate.Gen2DFingerprint(mol, MysigFactory) arr = np.array(list(res)).astype(np.bool) if return_bitInfo: description = [] for i in range(len(res)): description.append(MysigFactory.GetBitDescription(i)) return arr, description return arr
def extract_Pharm2D(column, minPointCount=2, maxPointCount=3, bins=[(0, 2), (2, 5), (5, 8)], from_smiles=True): """Extract Pharm2D fingerprint :param column: Pandas Series, containing smiles or RDKit mol object :param minPointCount: int :param maxPointCount: int :param bins: lits of tuples :param from_smiles: bool, indicate whether column contains smiles string :return: feature_Pharm2D: Pandas Series, containing Pharm2D features """ sigFactory = SigFactory(featFactory, minPointCount=minPointCount, maxPointCount=maxPointCount, trianglePruneBins=False) sigFactory.SetBins(bins) sigFactory.Init() def get_Pharm2D(x): mol = Chem.MolFromSmiles(x) if (mol is None) or (len(mol.GetAtoms()) == 0): return [0] * sigFactory.GetSigSize() else: return Generate.Gen2DFingerprint(mol, sigFactory) fp = column.apply(lambda x: get_Pharm2D(x)) return np.array(list(fp))
def pharmacophore(mol, target): i = 0 print('mol/target', mol, target) mol.standardize() target.standardize() mol = str(mol) mol = mol.replace('N(=O)O', '[N+](=O)[O-]') mol = mol.replace('N(O)=O', '[N+]([O-])=O') mol = mol.replace('n(O)', '[n+]([O-])') target = str(target) target = target.replace('N(=O)O', '[N+](=O)[O-]') target = target.replace('N(O)=O', '[N+]([O-])=O') target = target.replace('n(O)', '[n+]([O-])') featfactory = load_factory() sigfactory = SigFactory(featfactory, minPointCount=2, maxPointCount=3, trianglePruneBins=False) sigfactory.SetBins([(0, 2), (2, 5), (5, 8)]) sigfactory.Init() mol1 = Chem.MolFromSmiles(mol) mol2 = Chem.MolFromSmiles(target) if mol1 and mol2: fp1 = Generate.Gen2DFingerprint(mol1, sigfactory) fp2 = Generate.Gen2DFingerprint(mol2, sigfactory) sims = DataStructs.TanimotoSimilarity(fp1, fp2) return sims else: i = i + 1 print('ошибка', i, mol) return -100
def get_2Dfp(self, rdmols): #: ファーマコフォアの初期設定 fdefName = r'ensemble/BaseFeatures.fdef' featFactory = ChemicalFeatures.BuildFeatureFactory(fdefName) sigFactory = SigFactory(featFactory, minPointCount=2, maxPointCount=3) #: ファーマコフォア間の距離を離散化する sigFactory.SetBins([(0, 2), (2, 4)]) sigFactory.Init() fps1 = [ Generate.Gen2DFingerprint(mol, sigFactory).ToBitString() for mol in rdmols ] fps2 = [list(map(int, list(fps))) for fps in fps1] fps3 = np.array(fps2) return fps3
def pool_init(fdef_fname, bin_step): global process_factory global sig_factory process_factory = ChemicalFeatures.BuildFeatureFactory( fdef_fname) if fdef_fname else None sig_factory = SigFactory(process_factory, minPointCount=2, maxPointCount=3, trianglePruneBins=False) q = [] i = bin_step j = 0 while i < 20: q.append((j, i)) j = i i += bin_step sig_factory.SetBins(q) sig_factory.Init()
def CalculatePharm2D3pointFingerprint(mol, featFactory=featFactory): """ Calculate Pharm2D3point Fingerprints """ sigFactory_3point = SigFactory(featFactory, minPointCount=3, maxPointCount=3) sigFactory_3point.SetBins([(0, 2), (2, 4), (4, 6), (6, 10)]) sigFactory_3point.Init() res = Generate.Gen2DFingerprint(mol, sigFactory_3point) res_keys = tuple(res.GetOnBits()) init_list = [0] * 2135 for res_key in res_keys: init_list[res_key] = 1 BitVect = tuple(init_list) return BitVect, res_keys, res
def read_file(fname, fcfp4, fdef_fname): if not fcfp4: featFactory = ChemicalFeatures.BuildFeatureFactory(fdef_fname) sigFactory = SigFactory(featFactory, minPointCount=2, maxPointCount=3, trianglePruneBins=False) sigFactory.SetBins([(0, 2), (2, 5), (5, 8)]) sigFactory.Init() d = defaultdict(list) with open(fname) as f: for row in f: smiles, ids, aff = row.strip().split('\t') if smiles is not None: mol = Chem.MolFromSmiles(smiles) d['mol_name'].append(ids) d['smiles'].append(smiles) if fcfp4: d['fingerprint'].append(AllChem.GetMorganFingerprint(mol, 2, useFeatures=True)) else: d['fingerprint'].append(Generate.Gen2DFingerprint(mol, sigFactory)) return d
def CalculatePharm2D2pointFingerprint(mol, featFactory=featFactory): """ Calculate Pharm2D2point Fingerprints """ sigFactory_2point = SigFactory(featFactory, minPointCount=2, maxPointCount=2) sigFactory_2point.SetBins([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9)]) sigFactory_2point.Init() res = Generate.Gen2DFingerprint(mol, sigFactory_2point) res_keys = tuple(res.GetOnBits()) init_list = [0] * 135 for res_key in res_keys: init_list[res_key] = 1 BitVect = tuple(init_list) return BitVect, res_keys, res
RDConfig.RDDataDir, '/drug_development/studyRdkit/st_rdcit/data/BaseFeatures.fdef') # 实例化特征工厂 featFactory = ChemicalFeatures.BuildFeatureFactory(fdefName) # 使用特征工厂再来构建指纹工厂signature,factory用于设置指纹参数 # 构建指纹工厂 : SigFactory( featFactory, # 特征工厂 useCounts=False, # 默认False。False不考虑指纹频数,并生成SparseBitVect minPointCount=2, # 默认为2.生成指纹时包括的最少的药效团数量。 maxPointCount=3, # 默认为3。生成指纹时包括的最多的药效团数量。 ...) sigFactory = SigFactory(featFactory, minPointCount=2, maxPointCount=3) # 对拓扑距离进行分段 sigFactory.SetBins([(0, 2), (2, 5), (5, 8)]) # 每次修改参数后,都要初始化一下 sigFactory.Init() # 计算指纹的长度 print('指纹长度=', sigFactory.GetSigSize()) # 指纹长度= 2988 # # 4.2 生成2D药效团指纹 # 指纹工厂中的参数设置完毕,接下来就可以生成2D指纹了。 # 计算2D药效团指纹 : Gen2DFingerprint( mol, # 要计算指纹的mol对象 sigFactory, # 设置了参数的指纹工厂 bitinfo # 获取指纹id及对应的原子 ) mol = Chem.MolFromSmiles('OCC(=O)CCCN')
def get_factory(): featFactory = ChemicalFeatures.BuildFeatureFactoryFromString(fdef) factory = SigFactory(featFactory, minPointCount=2, maxPointCount=3, useCounts=True, trianglePruneBins=False) factory.SetBins(defaultBins) factory.Init() return factory
class AntidecoysSettings(Settings): """ A specialized class that holds the settings for the anti-decoys algorithm. """ def __init__( self, source, target, storage_dir=os.path.abspath( 'antidecoys_data' ) # path to a directory where the results will be stored , max_threads=None # maximum number of threads to use in parallel computations , tree_params=None # custom parameters (same for both trees) , max_iters=100 # maximum number of iterations to spend looking for a single path , verbose=False # require verbose output , fg_bins=((0, 2), (2, 5), (5, 8) ) # distance bins in the pharmacophore fingerprint , fg_min_points=2 # min number of features encoded in the pharmacophore fingerprint , fg_max_points=3 # max number of features encoded in the pharmacophore fingerprint , min_accepted=1000 # minimum number of morphs the filter will accept on every iteration , common_bits_max_thrs=0.75 # maximum common bits percentage the filter will accept on every iteration , common_bits_mean_thrs=0.5 # if for the mols selected by the filter the mean common bits percentage falls below this value, antidecoys will be turned off , antidecoys_min_iters=10 # minimum number of iterations where antidecoys are optimized , antidecoys_max_iters=50 # maximum number of iterations where antidecoys are optimized , distance_thrs=0.2 # turn antidecoys filter off when the distance between two closest molecules from each tree gets below this value ): super(AntidecoysSettings, self).__init__(source, target, storage_dir, max_threads, max_iters, tree_params, verbose) self.fg_bins = fg_bins """ distance bins in the pharmacophore fingerprint (as described `here <http://www.rdkit.org/docs/GettingStartedInPython.html#d-pharmacophore-fingerprints>`_) """ self.fg_min_points = fg_min_points """ min number of features encoded in the pharmacophore fingerprint (as described `here <http://www.rdkit.org/docs/GettingStartedInPython.html#d-pharmacophore-fingerprints>`_) """ self.fg_max_points = fg_max_points """ max number of features encoded in the pharmacophore fingerprint (as described `here <http://www.rdkit.org/docs/GettingStartedInPython.html#d-pharmacophore-fingerprints>`_) """ self.min_accepted = min_accepted """ minimum number of morphs the antidecoys filter will accept on every iteration """ self.common_bits_max_thrs = common_bits_max_thrs """ maximum percentage of shared bits between a structure and the anti-fingerprint that the filter will accept on every iteration """ self.common_bits_mean_thrs = common_bits_mean_thrs """ if for the structures that survived the filter the mean common bits percentage falls below this value, antidecoys will be turned off """ self.antidecoys_min_iters = antidecoys_min_iters """ minimum number of iterations that will use the antidecoys filter """ self.antidecoys_max_iters = antidecoys_max_iters """ maximum number of iterations that will use the antidecoys filter """ self.distance_thrs = distance_thrs """ turn antidecoys filter off when the distance between two closest molecules from each tree gets below this value """ # stuff for the pharmacophore fingerprints self._fdef_file = os.path.join( RDConfig.RDDataDir, 'BaseFeatures.fdef') # get basic feature definitions self._feature_factory = ChemicalFeatures.BuildFeatureFactory( self._fdef_file) # make feature factory self.signature_factory = SigFactory( self._feature_factory, minPointCount=self.fg_min_points, maxPointCount=self.fg_max_points, trianglePruneBins=False) # make signature factory self.signature_factory.SetBins(self.fg_bins) # set the distance bins self.signature_factory.Init()
def _init(): global labels, patts, factory featFactory = ChemicalFeatures.BuildFeatureFactoryFromString(fdef) factory = SigFactory(featFactory, minPointCount=2, maxPointCount=3) factory.SetBins(defaultBins) factory.Init()