def test_fps_5(data): fps = Fingerprints(featurizers='ECFP', n_jobs=1, input_type='any', on_errors='nan', n_bits=100) ret = fps.transform(data['err_smis']) assert isinstance(ret, pd.DataFrame) assert ret.shape == (4, 100) assert np.isnan(ret.values[1][10]) assert np.isnan(ret.values[2][20])
def test_fps_6(data): fps = Fingerprints(n_jobs=1, input_type='any', on_errors='nan', counting=True) ret = fps.transform(data['smis']) assert isinstance(ret, pd.DataFrame) assert ret.shape == (4, 16751)
def test_fps_3(data): fps = Fingerprints(n_jobs=1, input_type='any') try: fps.transform(data['mols'] + data['smis']) except BaseException as e: print(e) assert False, 'should not got error' else: assert True
def test_fps_4(data): fps = Fingerprints(n_jobs=1, input_type='any') with pytest.raises(ValueError): fps.transform(data['err_smis']) fps = Fingerprints(n_jobs=1, input_type='any', on_errors='nan') ret = fps.transform(data['err_smis']) assert isinstance(ret, pd.DataFrame) assert ret.shape == (4, 10607) assert np.isnan(ret.values[1][10]) assert np.isnan(ret.values[2][20])
def test_fps_1(data): fps = Fingerprints(n_jobs=1) try: fps.transform(data['mols']) except: assert False, 'should not got error' else: assert True try: fps.transform(data['smis']) except TypeError: assert True else: assert False, 'should not got error'
def main(): # load in-house data from csv file data = pd.read_csv("./iQSPR_sample_data.csv") data_ss = data.sample(3000).reset_index() from xenonpy.descriptor import Fingerprints RDKit_FPs = Fingerprints(featurizers=['ECFP'], input_type='smiles') # make models # prd_mdls = make_forward_model_with_iqspr_tools(data_ss, RDKit_FPs) prd_mdls, mdls = make_forward_model(data_ss, RDKit_FPs) # learn NGrams by method 0 (not recommended) # n_gram = learn_n_gram0(data_ss['SMILES']) # learn NGrams by method 1 (recommended) # n_gram = learn_n_gram1(data_ss['SMILES']) # learn NGrams by method2 (recommended) n_gram = learn_n_gram2(data_ss['SMILES']) n_gram.set_params(del_range=[1, 20], max_len=500, reorder_prob=0.5) # set up initial molecules for iQSPR np.random.seed(201903) # fix the random seed cans = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for i, smi in enumerate(data_ss['SMILES']) if (data_ss['H**O-LUMO gap'].iloc[i] > 4)] init_samples = np.random.choice(cans, 25) # set up annealing schedule in iQSPR beta = np.hstack( [np.linspace(0.01, 0.2, 20), np.linspace(0.21, 0.4, 10), np.linspace(0.4, 1, 10), np.linspace(1, 1, 10)]) iqspr_results = run_iqspr(prd_mdls, n_gram, init_samples, beta) visualize(iqspr_results, RDKit_FPs, mdls, data_ss, beta)
def test_fps_4(data): fps = Fingerprints(n_jobs=1, input_type='any') try: fps.transform(data['err_smis']) except ValueError: assert True else: assert False, 'should not got error' fps = ECFP(n_jobs=1, input_type='any', on_errors='nan') try: ret = fps.transform(data['err_smis']) assert pd.DataFrame(data=ret).shape == (4, 2048) assert np.isnan(ret[1][10]) assert np.isnan(ret[2][20]) except: assert False else: assert True
def property_plot(smile, plot_path): P_fp = Fingerprints(featurizers=fp_type, input_type='smiles', on_errors='nan').transform([smile]) P_fp = P_fp.dropna() PolyGeno_property = pd.read_csv( "./data/forward_train_Polymer_genome_new_asterisk_R.csv") DC = np.ravel(PolyGeno_property['Dielectric.Constant']) GTT = np.ravel(PolyGeno_property['Glass.Transition.Temperature']) singleFP2dc = pickle.load(open('./models/P_SMILES2DC_ElasticNet.sav', 'rb')) singleFP2gtt = pickle.load( open("./models/P_SMILES2GTT_ElasticNet.sav", 'rb')) property_region = { 'Dielectric_Constant': [3, 4], 'Glass_Transition_Temperature': [300, 350] } property_minmax = { 'Dielectric_Constant': [min(DC), max(DC)], 'Glass_Transition_Temperature': [min(GTT), max(GTT)] } plt.figure(figsize=(10, 10)) plt.xlim(property_minmax["Dielectric_Constant"][0] - 1, property_minmax["Dielectric_Constant"][1] + 1) plt.ylim(property_minmax["Glass_Transition_Temperature"][0] - 20, property_minmax["Glass_Transition_Temperature"][1] + 20) plt.plot(DC, GTT, 'k.', markersize=5, alpha=0.4, label='existing data') if len(P_fp) == 1: dc_pre = singleFP2dc.predict(P_fp) gtt_pre = singleFP2gtt.predict(P_fp) plt.scatter(dc_pre, gtt_pre, s=250, c='g', edgecolor='k', label='product') plt.xlabel("Dielectric Constant", fontsize=20) plt.ylabel("Glass Transition Temperature ($^\circ$C)", fontsize=20) plt.title("Properties of chemical structures", fontsize=20) plt.legend(loc='lower right', fontsize=20) plt.xticks(fontsize=20) plt.yticks(fontsize=20) plt.plot([ property_region["Dielectric_Constant"][0], property_region["Dielectric_Constant"][0], property_region["Dielectric_Constant"][1], property_region["Dielectric_Constant"][1], property_region["Dielectric_Constant"][0] ], [ property_region["Glass_Transition_Temperature"][0], property_region["Glass_Transition_Temperature"][1], property_region["Glass_Transition_Temperature"][1], property_region["Glass_Transition_Temperature"][0], property_region["Glass_Transition_Temperature"][0] ], 'r--') plt.savefig(plot_path) plt.close() if len(P_fp) == 0: dc_pre = 0 gtt_pre = 0 dc_pre = round(dc_pre[0], 2) gtt_pre = round(gtt_pre[0], 2) return (dc_pre, gtt_pre)
def test_fps_3(data): fps = Fingerprints(n_jobs=1, input_type='any') fps.transform(data['mols'] + data['smis'])
def test_fps_2(data): fps = Fingerprints(n_jobs=1, input_type='smiles') with pytest.raises(TypeError): fps.transform(data['mols']) fps.transform(data['smis'])
def test_fps_1(data): fps = Fingerprints(n_jobs=1) fps.transform(data['mols']) with pytest.raises(TypeError): fps.transform(data['smis'])