Пример #1
0
def test_iqspr_resample1(data):
    # not sure if this test can be fully reliable by only fixing the random seed
    like_mdl = data['like_mdl']
    ngram = data['ngram']
    beta = np.linspace(0.1, 1, 2)

    np.random.seed(0)
    iqspr = IQSPR(estimator=like_mdl, modifier=ngram, r_ESS=0)
    soln1 = [['C([*])C([*])(C(=O)OCCSCCC#N)', 'C([*])C([*])(SCCC)',
             'O([*])C(=O)OC(C=C1)=CC=C1C(C=C2)=CC=C2CC(C=C3)=CC=C3C(C=C4)=CC=C4([*])'],
            ['C([*])C([*])(C(=O)OCC(F)(F)C(F)(F)OC(F)(F)OC(F)(F)C(F)(F)OC(F)(F)C(F)(F)F)',
             'C([*])C([*])(CC)(C(=O)OCC(F)(F)F)',
    'O([*])C(=O)OC(C=C1)=CC=C1C(C=C1)=CC=C1CC(C=C1)=CC=C1C(C=C1)=CC=C1C(C=C1)=CC=C1C(C=C1)=CC=C1C(C=C1)=CC=C1C(=S)']
            ]
    c0 = 0
    for s, ll, p, f in iqspr(data['pg'][0][:3], beta, yield_lpf=True):
        assert np.abs(np.sum(p) - 1.0) < 1e-5
        assert np.sum(f) == 3
        assert np.all(np.sort(s) == np.array(soln1[c0]))
        c0 += 1

    np.random.seed(0)
    iqspr = IQSPR(estimator=like_mdl, modifier=ngram, r_ESS=1)
    soln2 = [['C([*])C([*])(C(=O)OCCSCCC#N)', 'C([*])C([*])(SCCC)',
             'O([*])C(=O)OC(C=C1)=CC=C1C(C=C2)=CC=C2CC(C=C3)=CC=C3C(C=C4)=CC=C4([*])'],
            ['O([*])C(=O)OC(C=C1)=CC=C1C(C=C1)=CC=C1CC(C=C1)=CC=C1C(C=C1)=CC=C1([*])',
             'O([*])C(=O)OC(C=C1)=CC=C1C(C=C1)=CC=C1CC(C=C1)=CC=C1C(C=C1)=CC=C1C(=S)']
            ]
    c0 = 0
    for s, ll, p, f in iqspr(data['pg'][0][:3], beta, yield_lpf=True):
        assert np.abs(np.sum(p) - 1.0) < 1e-5
        assert np.sum(f) == 3
        assert np.all(np.sort(s) == np.array(soln2[c0]))
        c0 += 1
Пример #2
0
def run_iqspr(prd_mdls, n_gram, init_samples, beta):
    # library for running iQSPR in XenonPy-iQSPR
    from xenonpy.inverse.iqspr import IQSPR

    # set up likelihood and n-gram models in iQSPR
    iqspr = IQSPR(estimator=prd_mdls, modifier=n_gram)

    np.random.seed(201903)  # fix the random seed
    # main loop of iQSPR
    samples, loglike, prob, freq = [], [], [], []
    for s, ll, p, freq in iqspr(init_samples, beta, yield_lpf=True,
                                **{'E': (0, 200), 'H**O-LUMO gap': (-np.inf, 3)}):
        samples.append(s)
        loglike.append(ll)
        prob.append(p)
        freq.append(freq)

    # record all outputs
    iqspr_results_reorder = {
        "samples": samples,
        "loglike": loglike,
        "prob": prob,
        "freq": freq,
        "beta": np.hstack([0, beta])  # include the step of initial samples
    }

    # save results
    with open('iQSPR_results_reorder.obj', 'wb') as f:
        pk.dump(iqspr_results_reorder, f)

    with open('iQSPR_results_reorder.obj', 'rb') as f:
        iqspr_results_reorder = pk.load(f)

    return iqspr_results_reorder
Пример #3
0
    def run(self):
        from xenonpy.inverse.iqspr import IQSPR
        from xenonpy.inverse.iqspr import BayesianRidgeEstimator

        self.check_properties()
        prd_mdls = BayesianRidgeEstimator(descriptor=self.descriptor_generator,
                                          **self.models)
        iqspr = IQSPR(estimator=prd_mdls, modifier=self.ngram_model)
        proposed_structures, log_likelihood, probability_score, iqspr_freq = [], [], [], []
        for s, ll, p, freq in iqspr(self.seed_structure,
                                    self.beta,
                                    yield_lpf=True,
                                    **self.desired_values):
            proposed_structures.append(s)
            log_likelihood.append(ll)
            probability_score.append(p)
            iqspr_freq.append(freq)

        # record all outputs
        iqspr_results = {
            "samples": proposed_structures,
            "loglike": log_likelihood,
            "prob": probability_score,
            "freq": iqspr_freq,
            "beta":
            np.hstack([0, self.beta])  # include the step of initial samples
        }

        self.iqspr_results = iqspr_results
        return iqspr, iqspr_results
Пример #4
0
def test_iqspr_2(data):
    np.random.seed(0)
    like_mdl = data['like_mdl']
    ngram = data['ngram']
    iqspr = IQSPR(estimator=like_mdl, modifier=ngram)

    beta1 = np.linspace(0.05, 1, 10)
    beta2 = np.linspace(0.01, 1, 10)
    beta = pd.DataFrame({'bandgap': beta1, 'glass_transition_temperature': beta2,
                        'density': beta1, 'refractive_index': beta2})
    for s, ll, p, f in iqspr(data['pg'][0][:5], beta, yield_lpf=True):
        assert np.abs(np.sum(p) - 1.0) < 1e-5
        assert np.sum(f) == 5
Пример #5
0
def test_iqspr_1(data):
    np.random.seed(0)
    ecfp = ECFP(n_jobs=1, input_type='smiles')
    bre = BayesianRidgeEstimator(descriptor=ecfp)
    ngram = NGram()
    iqspr = IQSPR(estimator=bre, modifier=ngram)
    X, y = data['pg']
    bre.fit(X, y)
    ngram.fit(data['pg'][0][0:20], train_order=10)
    beta = np.linspace(0.05, 1, 10)
    for s, ll, p, f in iqspr(data['pg'][0][:5], beta, yield_lpf=True, bandgap=(0.1, 0.2), density=(0.9, 1.2)):
        assert np.abs(np.sum(p) - 1.0) < 1e-5
        assert np.sum(f) == 5, print(f)
Пример #6
0
def test_iqspr_1(data):
    np.random.seed(0)
    ecfp = data['ecfp']
    bre = GaussianLogLikelihood(descriptor=ecfp)
    ngram = NGram()
    iqspr = IQSPR(estimator=bre, modifier=ngram)
    X, y = data['pg']
    bre.fit(X, y)
    bre.update_targets(reset=True, bandgap=(0.1, 0.2), density=(0.9, 1.2))
    ngram.fit(data['pg'][0][0:20], train_order=10)
    beta = np.linspace(0.05, 1, 10)
    for s, ll, p, f in iqspr(data['pg'][0][:5], beta, yield_lpf=True):
        assert np.abs(np.sum(p) - 1.0) < 1e-5
        assert np.sum(f) == 5
Пример #7
0
def data():
    # ignore numpy warning
    import warnings
    print('ignore NumPy RuntimeWarning\n')
    warnings.filterwarnings("ignore", message="numpy.dtype size changed")
    warnings.filterwarnings("ignore", message="numpy.ndarray size changed")

    pwd = Path(__file__).parent
    pg_data = pd.read_csv(str(pwd / 'polymer_test_data.csv'))

    X = pg_data['smiles']
    y = pg_data.drop(['smiles', 'Unnamed: 0'], axis=1)
    ecfp = ECFP(n_jobs=1, input_type='smiles', target_col=0)
    rdkitfp = RDKitFP(n_jobs=1, input_type='smiles', target_col=0)
    bre = GaussianLogLikelihood(descriptor=ecfp)
    bre2 = GaussianLogLikelihood(descriptor=rdkitfp)
    bre.fit(X, y[['bandgap', 'glass_transition_temperature']])
    bre2.fit(X, y[['density', 'refractive_index']])
    bre.update_targets(bandgap=(1, 2), glass_transition_temperature=(200, 300))
    bre2.update_targets(refractive_index=(2, 3), density=(0.9, 1.2))

    class MyLogLikelihood(BaseLogLikelihoodSet):
        def __init__(self):
            super().__init__()

            self.loglike = bre
            self.loglike = bre2

    like_mdl = MyLogLikelihood()
    ngram = NGram()
    ngram.fit(X[0:20], train_order=5)
    iqspr = IQSPR(estimator=bre, modifier=ngram)
    # prepare test data
    yield dict(ecfp=ecfp,
               rdkitfp=rdkitfp,
               bre=bre,
               bre2=bre2,
               like_mdl=like_mdl,
               ngram=ngram,
               iqspr=iqspr,
               pg=(X, y))

    print('test over')
Пример #8
0
def data():
    # ignore numpy warning
    import warnings
    print('ignore NumPy RuntimeWarning\n')
    warnings.filterwarnings("ignore", message="numpy.dtype size changed")
    warnings.filterwarnings("ignore", message="numpy.ndarray size changed")

    pwd = Path(__file__).parent
    pg_data = pd.read_csv(str(pwd / 'polymer_test_data.csv'))

    X = pg_data['smiles']
    y = pg_data.drop(['smiles', 'Unnamed: 0'], axis=1)
    ecfp = ECFP(n_jobs=1, input_type='smiles')
    bre = BayesianRidgeEstimator(descriptor=ecfp)
    ngram = NGram()
    iqspr = IQSPR(estimator=bre, modifier=ngram)
    # prepare test data
    yield dict(ecfp=ecfp, bre=bre, ngram=ngram, iqspr=iqspr, pg=(X, y))

    print('test over')