def test_iqspr_resample1(data): # not sure if this test can be fully reliable by only fixing the random seed like_mdl = data['like_mdl'] ngram = data['ngram'] beta = np.linspace(0.1, 1, 2) np.random.seed(0) iqspr = IQSPR(estimator=like_mdl, modifier=ngram, r_ESS=0) soln1 = [['C([*])C([*])(C(=O)OCCSCCC#N)', 'C([*])C([*])(SCCC)', 'O([*])C(=O)OC(C=C1)=CC=C1C(C=C2)=CC=C2CC(C=C3)=CC=C3C(C=C4)=CC=C4([*])'], ['C([*])C([*])(C(=O)OCC(F)(F)C(F)(F)OC(F)(F)OC(F)(F)C(F)(F)OC(F)(F)C(F)(F)F)', 'C([*])C([*])(CC)(C(=O)OCC(F)(F)F)', 'O([*])C(=O)OC(C=C1)=CC=C1C(C=C1)=CC=C1CC(C=C1)=CC=C1C(C=C1)=CC=C1C(C=C1)=CC=C1C(C=C1)=CC=C1C(C=C1)=CC=C1C(=S)'] ] c0 = 0 for s, ll, p, f in iqspr(data['pg'][0][:3], beta, yield_lpf=True): assert np.abs(np.sum(p) - 1.0) < 1e-5 assert np.sum(f) == 3 assert np.all(np.sort(s) == np.array(soln1[c0])) c0 += 1 np.random.seed(0) iqspr = IQSPR(estimator=like_mdl, modifier=ngram, r_ESS=1) soln2 = [['C([*])C([*])(C(=O)OCCSCCC#N)', 'C([*])C([*])(SCCC)', 'O([*])C(=O)OC(C=C1)=CC=C1C(C=C2)=CC=C2CC(C=C3)=CC=C3C(C=C4)=CC=C4([*])'], ['O([*])C(=O)OC(C=C1)=CC=C1C(C=C1)=CC=C1CC(C=C1)=CC=C1C(C=C1)=CC=C1([*])', 'O([*])C(=O)OC(C=C1)=CC=C1C(C=C1)=CC=C1CC(C=C1)=CC=C1C(C=C1)=CC=C1C(=S)'] ] c0 = 0 for s, ll, p, f in iqspr(data['pg'][0][:3], beta, yield_lpf=True): assert np.abs(np.sum(p) - 1.0) < 1e-5 assert np.sum(f) == 3 assert np.all(np.sort(s) == np.array(soln2[c0])) c0 += 1
def run_iqspr(prd_mdls, n_gram, init_samples, beta): # library for running iQSPR in XenonPy-iQSPR from xenonpy.inverse.iqspr import IQSPR # set up likelihood and n-gram models in iQSPR iqspr = IQSPR(estimator=prd_mdls, modifier=n_gram) np.random.seed(201903) # fix the random seed # main loop of iQSPR samples, loglike, prob, freq = [], [], [], [] for s, ll, p, freq in iqspr(init_samples, beta, yield_lpf=True, **{'E': (0, 200), 'H**O-LUMO gap': (-np.inf, 3)}): samples.append(s) loglike.append(ll) prob.append(p) freq.append(freq) # record all outputs iqspr_results_reorder = { "samples": samples, "loglike": loglike, "prob": prob, "freq": freq, "beta": np.hstack([0, beta]) # include the step of initial samples } # save results with open('iQSPR_results_reorder.obj', 'wb') as f: pk.dump(iqspr_results_reorder, f) with open('iQSPR_results_reorder.obj', 'rb') as f: iqspr_results_reorder = pk.load(f) return iqspr_results_reorder
def run(self): from xenonpy.inverse.iqspr import IQSPR from xenonpy.inverse.iqspr import BayesianRidgeEstimator self.check_properties() prd_mdls = BayesianRidgeEstimator(descriptor=self.descriptor_generator, **self.models) iqspr = IQSPR(estimator=prd_mdls, modifier=self.ngram_model) proposed_structures, log_likelihood, probability_score, iqspr_freq = [], [], [], [] for s, ll, p, freq in iqspr(self.seed_structure, self.beta, yield_lpf=True, **self.desired_values): proposed_structures.append(s) log_likelihood.append(ll) probability_score.append(p) iqspr_freq.append(freq) # record all outputs iqspr_results = { "samples": proposed_structures, "loglike": log_likelihood, "prob": probability_score, "freq": iqspr_freq, "beta": np.hstack([0, self.beta]) # include the step of initial samples } self.iqspr_results = iqspr_results return iqspr, iqspr_results
def test_iqspr_2(data): np.random.seed(0) like_mdl = data['like_mdl'] ngram = data['ngram'] iqspr = IQSPR(estimator=like_mdl, modifier=ngram) beta1 = np.linspace(0.05, 1, 10) beta2 = np.linspace(0.01, 1, 10) beta = pd.DataFrame({'bandgap': beta1, 'glass_transition_temperature': beta2, 'density': beta1, 'refractive_index': beta2}) for s, ll, p, f in iqspr(data['pg'][0][:5], beta, yield_lpf=True): assert np.abs(np.sum(p) - 1.0) < 1e-5 assert np.sum(f) == 5
def test_iqspr_1(data): np.random.seed(0) ecfp = ECFP(n_jobs=1, input_type='smiles') bre = BayesianRidgeEstimator(descriptor=ecfp) ngram = NGram() iqspr = IQSPR(estimator=bre, modifier=ngram) X, y = data['pg'] bre.fit(X, y) ngram.fit(data['pg'][0][0:20], train_order=10) beta = np.linspace(0.05, 1, 10) for s, ll, p, f in iqspr(data['pg'][0][:5], beta, yield_lpf=True, bandgap=(0.1, 0.2), density=(0.9, 1.2)): assert np.abs(np.sum(p) - 1.0) < 1e-5 assert np.sum(f) == 5, print(f)
def test_iqspr_1(data): np.random.seed(0) ecfp = data['ecfp'] bre = GaussianLogLikelihood(descriptor=ecfp) ngram = NGram() iqspr = IQSPR(estimator=bre, modifier=ngram) X, y = data['pg'] bre.fit(X, y) bre.update_targets(reset=True, bandgap=(0.1, 0.2), density=(0.9, 1.2)) ngram.fit(data['pg'][0][0:20], train_order=10) beta = np.linspace(0.05, 1, 10) for s, ll, p, f in iqspr(data['pg'][0][:5], beta, yield_lpf=True): assert np.abs(np.sum(p) - 1.0) < 1e-5 assert np.sum(f) == 5
def data(): # ignore numpy warning import warnings print('ignore NumPy RuntimeWarning\n') warnings.filterwarnings("ignore", message="numpy.dtype size changed") warnings.filterwarnings("ignore", message="numpy.ndarray size changed") pwd = Path(__file__).parent pg_data = pd.read_csv(str(pwd / 'polymer_test_data.csv')) X = pg_data['smiles'] y = pg_data.drop(['smiles', 'Unnamed: 0'], axis=1) ecfp = ECFP(n_jobs=1, input_type='smiles', target_col=0) rdkitfp = RDKitFP(n_jobs=1, input_type='smiles', target_col=0) bre = GaussianLogLikelihood(descriptor=ecfp) bre2 = GaussianLogLikelihood(descriptor=rdkitfp) bre.fit(X, y[['bandgap', 'glass_transition_temperature']]) bre2.fit(X, y[['density', 'refractive_index']]) bre.update_targets(bandgap=(1, 2), glass_transition_temperature=(200, 300)) bre2.update_targets(refractive_index=(2, 3), density=(0.9, 1.2)) class MyLogLikelihood(BaseLogLikelihoodSet): def __init__(self): super().__init__() self.loglike = bre self.loglike = bre2 like_mdl = MyLogLikelihood() ngram = NGram() ngram.fit(X[0:20], train_order=5) iqspr = IQSPR(estimator=bre, modifier=ngram) # prepare test data yield dict(ecfp=ecfp, rdkitfp=rdkitfp, bre=bre, bre2=bre2, like_mdl=like_mdl, ngram=ngram, iqspr=iqspr, pg=(X, y)) print('test over')
def data(): # ignore numpy warning import warnings print('ignore NumPy RuntimeWarning\n') warnings.filterwarnings("ignore", message="numpy.dtype size changed") warnings.filterwarnings("ignore", message="numpy.ndarray size changed") pwd = Path(__file__).parent pg_data = pd.read_csv(str(pwd / 'polymer_test_data.csv')) X = pg_data['smiles'] y = pg_data.drop(['smiles', 'Unnamed: 0'], axis=1) ecfp = ECFP(n_jobs=1, input_type='smiles') bre = BayesianRidgeEstimator(descriptor=ecfp) ngram = NGram() iqspr = IQSPR(estimator=bre, modifier=ngram) # prepare test data yield dict(ecfp=ecfp, bre=bre, ngram=ngram, iqspr=iqspr, pg=(X, y)) print('test over')