def qeds(df): N = df.shape[0] n = int(N * threshold) df = df.sort_values('score') df = df[:n] # top 10 % statistics smiles = df.smile smiles = [s for s in smiles if Chem.MolFromSmiles(s) is not None] mols = [Chem.MolFromSmiles(s) for s in smiles] q = np.array([QED.default(m) for m in mols]) return (np.mean(q), np.std(q))
N = 3 # Plot : 2500 first : optimol = pd.read_csv( os.path.join(script_dir, '..', 'cbas/slurm/results/big_new_lr/optimol_scored.csv')) optimol = optimol[:10000] optimol = optimol.sample(10) # Top molecules samples = optimol.sort_values('score') smiles, scores = samples.smile, samples.score smiles = smiles[:N] mols = [Chem.MolFromSmiles(s) for s in smiles] qeds = np.array([QED.default(m) for m in mols]) sas = [calculateScore(m) for m in mols] scores = scores[:N] img1 = Draw.MolsToGridImage(mols, molsPerRow=1, useSVG=False, legends=[f'{sc:.2f}' for sc in scores]) img2 = Draw.MolsToGridImage(mols, molsPerRow=1, useSVG=False) svg2pdf(str(img), write_to='optimol_samp_2.pdf') """ ['Cc1ccccc1CCC(=O)OCC(=O)NCCc1ccc2ccccc2c1' 'COCC1CCCCN(C(=O)C(=O)NCc2cc3ccccc3c3ccccc23)C1' 'Cc1ccccc1CC1CCCN1C(=O)CC1Cc2ccccc2NC1=O'] """
prop_names = [ 'QED', 'logP', 'molWt', 'maxCharge', 'minCharge', 'valence', 'TPSA', 'HBA', 'HBD', 'jIndex' ] for name in prop_names: d[f'{name}'] = [] for i, s in enumerate(smiles): if (i % 10000 == 0): print(i) m = Chem.MolFromSmiles(s) if (m == None or 'i' in s or '.' in s): DUD = DUD.drop(i) print(s, i) else: d['QED'].append(QED.default(m)) d['logP'].append(Crippen.MolLogP(m)) d['molWt'].append(Descriptors.MolWt(m)) d['maxCharge'].append(Descriptors.MaxPartialCharge(m)) d['minCharge'].append(Descriptors.MinPartialCharge(m)) d['valence'].append(Descriptors.NumValenceElectrons(m)) d['TPSA'].append(rdMolDescriptors.CalcTPSA(m)) d['HBA'].append(rdMolDescriptors.CalcNumHBA(m)) d['HBD'].append(rdMolDescriptors.CalcNumHBD(m)) d['jIndex'].append(GraphDescriptors.BalabanJ(m)) df = pd.DataFrame.from_dict(d) df_merge = pd.merge(df, DUD, on=df.index) #df_merge.to_csv('/home/mcb/jboitr/data/DUD_full.csv')