示例#1
0
def pains(filtered_df):
	filteredData = filtered_df
	params = FilterCatalogParams()
	# Build a catalog from all PAINS (A, B and C)
	params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS)
	catalog = FilterCatalog(params)
	# Create empty dataframes for filtered data
	rdkit_highLightFramePAINS = pd.DataFrame(columns=('CompID', 'CompMol', 'unwantedID'))
	rdkit_noPAINS = pd.DataFrame(columns=('ChEMBL_ID', 'smiles','pIC50'))
	rdkit_withPAINS = pd.DataFrame(columns=('ChEMBL_ID', 'smiles', 'pIC50','unwantedID'))
	# For index and row in the filtered df
	for i,row in filteredData.iterrows():
		curMol = Chem.MolFromSmiles(row.smiles) # Current molecule
		match = False # Set match to false
		rdkit_PAINSList = []
		# Get the first match
		entry = catalog.GetFirstMatch(curMol)
		if entry!=None:
			# Add name of current unwanted subsftructure to list
			rdkit_PAINSList.append(entry.GetDescription().capitalize())
			# Add relevant matching information to dataframe
			rdkit_highLightFramePAINS.loc[len(rdkit_highLightFramePAINS)] = [row.molecule_chembl_id, curMol,
			entry.GetDescription().capitalize()]
			match = True
		if not match:
			# Add to frame of PAINS free compounds
			rdkit_noPAINS.loc[len(rdkit_noPAINS)] = [row.molecule_chembl_id, row.smiles, row.pIC50]
		else: 
			# Add to frame of compounds that contain PAINS
			# Put the relevant information in the dataframe with the unwanted substructures
			rdkit_withPAINS.loc[len(rdkit_withPAINS)] = [row.molecule_chembl_id, row.smiles, row.pIC50, entry.GetDescription().capitalize()]
	df = rdkit_noPAINS
	# Drop unnecessary columns
	## df_new = df.drop(['units', 'IC50'], axis=1)
	df_new = df
	# Create molecules from smiles and their fingerprints
	create_mol(df_new, 2048)
	# Add column for activity
	df_new['active'] = np.zeros(len(df_new))
	# Mark every molecule as active with an pIC50 of > 6.3
	df_new.loc[df_new[df_new.pIC50 >= 6.3].index, 'active'] = 1.0
	return df_new
示例#2
0
def painspredict(thefile, theoutput):

    os.remove('output.txt')
    f1 = open(theoutput, 'w+')

    mySMILESinput = pd.DataFrame(columns=['ID', 'my_smiles'])

    params = FilterCatalogParams()
    params.AddCatalog(FilterCatalogParams.FilterCatalogs.NIH)
    catalog = FilterCatalog(params)
    suppl = Chem.SmilesMolSupplier(thefile)
    with open(thefile, 'r') as inf:
        first_line = inf.readline()
        inf.close()

    with open(thefile, 'a') as inf:

        inf.write(first_line)
        inf.close()

    inf = open(thefile, 'r')

    sub_strct = [line.rstrip().split(" ") for line in inf]

    ms = [x for x in suppl if x is not None]
    i = 0

    for mol in ms:
        entry = catalog.GetFirstMatch(mol)
        sphybrid = Chem.rdMolDescriptors.CalcFractionCSP3(mol)
        if (entry is not None):
            print(i,
                  sub_strct[i],
                  "PAINS",
                  entry.GetDescription(),
                  "Fsp3",
                  sphybrid,
                  file=f1)
        else:
            print(i, sub_strct[i], "PAINS OK", "Fsp3", sphybrid, file=f1)
        i += 1