def test_LFM_gradient(artificial_data, models): reg_truth = Regressor(ss=models[0]) reg_truth._use_penalty = False reg_truth._use_jacobian = True dt, u, u1, y, *_ = reg_truth._prepare_data(artificial_data, ['To', 'Qh'], 'Ti') reg_lfm = Regressor(ss=models[1]) reg_lfm._use_penalty = False reg_lfm._use_jacobian = True eta_truth = deepcopy(reg_truth.ss.parameters.eta_free) eta_lfm = deepcopy(reg_lfm.ss.parameters.eta_free) grad_truth = reg_truth._eval_dlog_posterior(eta_truth, dt, u, u1, y)[1] grad_lfm = reg_lfm._eval_dlog_posterior(eta_lfm, dt, u, u1, y)[1] fct = nd.Gradient(reg_truth._eval_log_posterior) grad_truth_approx = fct(eta_truth, dt, u, u1, y) assert np.all(eta_truth == eta_lfm) assert ned(grad_truth, grad_lfm) < 1e-7 assert ned(grad_truth, grad_truth_approx) < 1e-7 assert np.all(np.sign(grad_truth) == np.sign(grad_truth_approx)) assert np.all(np.sign(grad_truth) == np.sign(grad_lfm)) assert grad_truth == pytest.approx(grad_truth_approx, rel=1e-6) assert grad_truth == pytest.approx(grad_lfm, rel=1e-6)
def test_gp_product_creation(data_Periodic, models): m, qp = models period, mscale, lscale, decay, sigv = np.random.uniform(0.3, 3.0, 5) par_Periodic = [ dict(name='period', value=period, transform='log'), dict(name='mscale', value=mscale, transform='log'), dict(name='lscale', value=lscale, transform='log'), dict(name='sigv', value=sigv, transform='log'), ] par_Matern = [ dict(name='mscale', value=1.0, transform='fixed'), dict(name='lscale', value=decay, transform='log'), dict(name='sigv', value=0.0, transform='fixed'), ] par_QuasiPeriodic = [ dict(name='period', value=period, transform='log'), dict(name='mscale', value=mscale, transform='log'), dict(name='lscale', value=lscale, transform='log'), dict(name='sigv', value=sigv, transform='log'), dict(name='decay', value=decay, transform='log'), ] reg1 = Regressor(ss=qp(par_QuasiPeriodic)) reg2 = Regressor(ss=Periodic(par_Periodic) * m(par_Matern)) check_grad(data_Periodic, reg1, reg2)
def test_save_version_number(): from pysip import __version__ save_model('test', Regressor(None)) load_reg = load_model('test') assert load_reg.__version__ == __version__
def test_save_model_to_pickle(reg): reg = Regressor(reg) reg.ss.parameters.theta = np.random.uniform(1e-1, 1, len(reg.ss.parameters.theta)) reg.ss.update_continuous_dssm() dA = reg.ss.dA dB = reg.ss.dB dC = reg.ss.dC dD = reg.ss.dD dQ = reg.ss.dQ dR = reg.ss.dR dx0 = reg.ss.dx0 dP0 = reg.ss.dP0 save_model('test', reg) load_reg = load_model('test') for k in dA.keys(): assert np.allclose(dA[k], load_reg.ss.dA[k]) assert np.allclose(dB[k], load_reg.ss.dB[k]) assert np.allclose(dC[k], load_reg.ss.dC[k]) assert np.allclose(dD[k], load_reg.ss.dD[k]) assert np.allclose(dQ[k], load_reg.ss.dQ[k]) assert np.allclose(dR[k], load_reg.ss.dR[k]) assert np.allclose(dx0[k], load_reg.ss.dx0[k]) assert np.allclose(dP0[k], load_reg.ss.dP0[k]) assert id(reg) != id(load_reg) for a, b in zip(reg.ss.parameters, load_reg.ss.parameters): assert a == b os.remove('test.pickle')
def test_fit_hmc_m32(): """Generate samples from the posterior distribution""" n_cpu = 1 np.random.seed(1) N = 50 t = np.linspace(0, 1, N) y = np.sin(12 * t) + 0.66 * np.cos(25 * t) + np.random.randn(N) * 0.1 df = pd.DataFrame(index=t, data=y, columns=['y']) par = [ dict(name='mscale', value=9.313e-01, bounds=(0, None), prior=Gamma(4, 4)), dict(name='lscale', value=1.291e-01, bounds=(0, None), prior=InverseGamma(3.5, 0.5)), dict(name='sigv', value=9.241e-02, bounds=(0, None), prior=InverseGamma(3.5, 0.5)), ] reg = Regressor(Matern32(par)) fit = reg.fit(df=df, outputs='y', options={'init': 'fixed', 'n_cpu': n_cpu}) # return df, reg, fit diagnostic = fit.diagnostic assert isinstance(diagnostic, pd.DataFrame) assert np.all(diagnostic['ebfmi'] > 0.8) assert np.all(diagnostic['mean accept_prob'] > 0.7) assert np.sum(diagnostic['sum diverging']) == 0 assert np.sum(diagnostic['sum max_tree_depth']) == 0 summary = az.summary(fit.posterior, round_to='none') assert isinstance(summary, pd.DataFrame) assert np.all(summary['r_hat'] < 1.01) assert np.all(summary[['ess_mean', 'ess_sd', 'ess_bulk', 'ess_tail']] > 1000) # mcse for ess_mean = 1000 assert summary['mean']['mscale'] == pytest.approx(1.107023, abs=3 * 0.009261) assert summary['mean']['lscale'] == pytest.approx(0.146614, abs=3 * 0.001074) assert summary['mean']['sigv'] == pytest.approx(0.096477, abs=3 * 0.000515) assert summary['mean']['lp_'] == pytest.approx(2.919439, abs=3 * 0.038186) xm, xsd = reg.posterior_state_distribution( trace=fit.posterior, df=df, outputs='y', smooth=True, n_cpu=n_cpu ) assert isinstance(xm, np.ndarray) assert isinstance(xsd, np.ndarray) assert xm.shape == (4000, len(df), reg.ss.nx) assert xsd.shape == (4000, len(df), reg.ss.nx) assert np.mean(np.mean((df['y'].values - xm[:, :, 0]) ** 2, axis=1) ** 0.5) == pytest.approx( 5.839e-2, abs=1e-2 ) ym, ysd = reg.posterior_predictive(trace=fit.posterior, df=df, outputs='y', n_cpu=n_cpu) assert isinstance(ym, np.ndarray) assert isinstance(ysd, np.ndarray) assert ym.shape == (4000, len(df)) assert ysd.shape == (4000, len(df)) assert np.mean(np.mean((df['y'].values - ym) ** 2, axis=1) ** 0.5) == pytest.approx( 3.728e-2, abs=1e-2 ) pw_loglik = reg.pointwise_log_likelihood(trace=fit.posterior, df=df, outputs='y', n_cpu=n_cpu) assert isinstance(pw_loglik, dict) assert pw_loglik['log_likelihood'].shape == (4, 1000, len(df)) # 0.026 ~ pw_loglik['log_likelihood'].sum(axis=2).std() / np.sqrt(1000) assert pw_loglik['log_likelihood'].sum(axis=2).mean() == pytest.approx(-1.394, abs=3.256e-2)
def gen_regressor(statespaces): """State-space generator for gradient test""" for ssm in statespaces: p = random_parameters(ssm) inputs = get_inputs(ssm) outputs = get_outputs(ssm) h = np.round(np.random.rand()) yield Regressor(ss=ssm(parameters=p, hold_order=h)), inputs, outputs
def generate_regressor(*statespaces): """State-space generator for gradient test""" values = np.random.uniform(0.3, 2.0, 3) p = [ dict(name='mscale', value=values[0], transform='log'), dict(name='lscale', value=values[1], transform='log'), dict(name='sigv', value=values[2], transform='log'), ] for ssm in statespaces: yield Regressor(ss=ssm(parameters=p))
def test_Periodic(data_Periodic): p = [ dict(name='period', transform='log'), dict(name='mscale', transform='log'), dict(name='lscale', transform='log'), dict(name='sigv', transform='log'), ] regressor = Regressor(ss=Periodic(parameters=p)) regressor.ss.parameters.theta = np.random.uniform(0.3, 3.0, 4) check_grad_fd(data_Periodic, regressor)
def test_model_pickle_file_size_limit(ss, size_kb): model = Regressor(ss) model.ss.parameters.theta = np.random.uniform( 1e-1, 1, len(model.ss.parameters.theta)) model.ss.update_continuous_dssm() save_model('big', model) size = os.path.getsize('big.pickle') os.remove('big.pickle') assert size <= size_kb * 1000
def generate_regressor_sum(*statespaces): """State-space generator for gradient test""" values = np.random.uniform(0.3, 3.0, 6) par_Periodic = [ dict(name="period", value=values[0], bounds=(0.0, None)), dict(name='mscale', value=values[1], bounds=(0.0, None)), dict(name='lscale', value=values[2], bounds=(0.0, None)), dict(name='sigv', value=values[3], bounds=(0.0, None)), ] par_Matern = [ dict(name="mscale", value=values[4], bounds=(0.0, None)), dict(name="lscale", value=values[5], bounds=(0.0, None)), dict(name="sigv", value=0.0, transform='fixed'), ] for ssm in statespaces: yield Regressor(Periodic(par_Periodic) + ssm(par_Matern))
def generate_regressor_product(*statespaces): """State-space generator for gradient test""" values = np.random.uniform(1.0, 2.0, 5) par_Periodic = [ dict(name='period', value=values[0], bounds=(0.0, None)), dict(name='mscale', value=values[1], bounds=(0.0, None)), dict(name='lscale', value=values[2], bounds=(0.0, None)), dict(name='sigv', value=values[3], bounds=(0.0, None)), ] par_Matern = [ dict(name='mscale', value=1.0, transform='fixed'), dict(name='lscale', value=values[4], bounds=(0.0, None)), dict(name='sigv', value=0.0, transform='fixed'), ] for ssm in statespaces: yield Regressor(ss=Periodic(parameters=par_Periodic) * ssm(parameters=par_Matern))
# Generate artificial data np.random.seed(1) N = 20 t = np.sort(np.random.rand(1, N), axis=1).flatten() y = np.sin(12 * t) + 0.66 * np.cos(25 * t) + np.random.randn(1, N) * 0.01 y = y.flatten() df = pd.DataFrame(index=t, data=y, columns=['y']) # Parameter settings for the Matérn covariance function with smoothness = 3/2 parameters = [ dict(name='mscale', value=0.5, transform='log'), dict(name='lscale', value=0.5, transform='log'), dict(name='sigv', value=0.1, transform='log'), ] reg = Regressor(Matern32(parameters)) fit_summary, corr_matrix, opt_summary = reg.fit(df=df, outputs='y') # Fit results print(f'\n{fit_summary}') # Predict on test data tnew = np.linspace(-0.1, 1.1, 500) ym, ysd = reg.predict(df=df, outputs='y', tnew=tnew, smooth=True) # Plot output mean and 95% credible intervals sns.set_style('darkgrid') sns.set_context('talk') plt.plot(t, y, linestyle='', marker='+', mew=2, label='data', color='darkred') plt.plot(tnew, ym, color='navy', label='mean') plt.fill_between(tnew,
amplitude = alim[0] + (alim[1] - alim[0]) * np.random.random() noise = 0.2 * np.random.randn(n) y = amplitude * np.sin(2.0 * np.pi / period * t) + noise y[y <= 0] = 0.0 data = pd.DataFrame(index=t, data=y, columns=['y']) # Parameter settings for the Periodic covariance function parameters = [ dict(name='period', value=1.0, transform='fixed'), dict(name='mscale', value=1.0, transform='log'), dict(name='lscale', value=1.0, transform='log'), dict(name='sigv', value=0.1, transform='log'), ] # Instantiate regressor with the Periodic covariance function reg = Regressor(Periodic(parameters)) fit_summary, corr_matrix, opt_summary = reg.fit(df=data, outputs='y') # Fit results print(f'\n{fit_summary}') # Predict on test data tnew = np.linspace(xlim[0], xlim[1] + 1, 500) ym, ysd = reg.predict(df=data, outputs='y', tnew=tnew, smooth=True) # Plot output mean and 95% credible intervals sns.set_style('darkgrid') sns.set_context('talk') plt.plot(t, y, linestyle='', marker='+', mew=2, label='data', color='darkred') plt.plot(tnew, ym, color='navy', label='mean')
# Matérn 3/2 for short-term effects p4 = [ dict(name='mscale', value=4.595e-01, transform='log'), dict(name='lscale', value=6.359e-01, transform='log'), dict(name='sigv', value=0.0, transform='fixed'), ] k1 = Matern52(p1, name='k1') k2 = Periodic(p2, name='k2') k3 = Matern32(p3, name='k3') k4 = Matern32(p4, name='k4') # Compose covariance function K = k1 + k2 * k3 + k4 reg = Regressor(K) fit_summary, corr_matrix, opt_summary = reg.fit(df=df, outputs='CO2_fit') print(f'\n{fit_summary}') # generate new prediction time instants tnew = np.arange(1958, 2030, 0.01) ym, ysd = reg.predict(df=df, outputs='CO2_pred', smooth=True, tnew=tnew) # Plot output mean and 95% credible intervals sns.set_style('darkgrid') sns.set_context('talk') plt.plot(df.index, df['CO2'], color='darkred', label='data') plt.plot(tnew, C02_mean + ym, color='navy', label='mean') plt.fill_between( tnew,
def test_fit_hmc_m32(): """Generate samples from the posterior distribution""" n_cpu = 1 np.random.seed(1) N = 50 t = np.linspace(0, 1, N) y = np.sin(12 * t) + 0.66 * np.cos(25 * t) + np.random.randn(N) * 0.1 df = pd.DataFrame(index=t, data=y, columns=['y']) par = [ dict(name='mscale', value=1.11, bounds=(0, None), prior=Gamma(4, 4)), dict(name='lscale', value=0.15, bounds=(0, None), prior=InverseGamma(3.5, 0.5)), dict(name='sigv', value=0.1, bounds=(0, None), prior=InverseGamma(3.5, 0.5)), ] reg = Regressor(Matern32(par)) fit = reg.fit( df=df, outputs='y', options={ 'init': 'fixed', 'n_cpu': n_cpu, 'dense_mass_matrix': True }, ) # return df, reg, fit diag_ = fit.diagnostic assert isinstance(diag_, pd.DataFrame) assert np.all(diag_['ebfmi'] > 0.9) assert np.all(diag_['mean accept_prob'] > 0.7) assert np.sum(diag_['sum diverging']) == 0 assert np.sum(diag_['sum max_tree_depth']) == 0 sum_ = az.summary(fit.posterior, round_to='none') assert isinstance(sum_, pd.DataFrame) assert np.all(sum_['r_hat'] < 1.01) assert np.all(sum_[['ess_mean', 'ess_sd', 'ess_bulk', 'ess_tail']] > 1000) assert sum_['mean']['mscale'] == pytest.approx(1.1, rel=5e-2) assert sum_['mean']['lscale'] == pytest.approx(1.5e-1, rel=5e-2) assert sum_['mean']['sigv'] == pytest.approx(9.6e-2, rel=5e-2) assert sum_['mean']['lp_'] == pytest.approx(2.9, rel=5e-2) xm, xsd = reg.posterior_state_distribution(trace=fit.posterior, df=df, outputs='y', smooth=True, n_cpu=n_cpu) assert isinstance(xm, np.ndarray) assert isinstance(xsd, np.ndarray) assert xm.shape == (4000, len(df), reg.ss.nx) assert xsd.shape == (4000, len(df), reg.ss.nx) assert np.mean(np.mean((df['y'].values - xm[:, :, 0])**2, axis=1)**0.5) == pytest.approx(5.8e-2, rel=5e-2) ym, ysd = reg.posterior_predictive(trace=fit.posterior, df=df, outputs='y', n_cpu=n_cpu) assert isinstance(ym, np.ndarray) assert isinstance(ysd, np.ndarray) assert ym.shape == (4000, len(df)) assert ysd.shape == (4000, len(df)) assert np.mean(np.mean((df['y'].values - ym)**2, axis=1)**0.5) == pytest.approx(3.7e-2, rel=5e-2) pwloglik = reg.pointwise_log_likelihood(trace=fit.posterior, df=df, outputs='y', n_cpu=n_cpu) assert isinstance(pwloglik, dict) assert pwloglik['log_likelihood'].shape == (4, 1000, len(df)) assert pwloglik['log_likelihood'].sum(axis=2).mean() == pytest.approx( -1.35, rel=5e-2)
dict(name='Ro', scale=1e-2, bounds=(0, None), prior=Gamma(2, 0.1)), dict(name='Ri', scale=1e-3, bounds=(0, None), prior=Gamma(2, 0.1)), dict(name='Cw', scale=1e7 / sT, bounds=(0, None), prior=Gamma(2, 0.1)), dict(name='Ci', scale=1e6 / sT, bounds=(0, None), prior=Gamma(2, 0.1)), dict(name='sigw_w', scale=1e-2 * sT ** 0.5, bounds=(0, None), prior=Gamma(2, 0.1)), dict(name='sigw_i', value=0, transform='fixed'), dict(name='sigv', scale=1e-2, bounds=(0, None), prior=Gamma(2, 0.1)), dict(name='x0_w', loc=25, scale=7, prior=Normal(0, 1)), dict(name='x0_i', value=y0, transform='fixed'), dict(name='sigx0_w', value=0.1, transform='fixed'), dict(name='sigx0_i', value=0.1, transform='fixed'), ] # Instantiate the model and use the first order hold approximation model = TwTi_RoRi(parameters, hold_order=1) reg = Regressor(model) # Dynamic Hamiltonian Monte Carlo with multinomial sampling fit = reg.fit(df=df, inputs=inputs, outputs='T_int') # Compute the posterior predictive distribution ym = reg.posterior_predictive(trace=fit.posterior, df=df, inputs=inputs)[0] sns.set_style('darkgrid') sns.set_context('talk') percentile_plot( df.index, ym, n=10, percentile_min=0, percentile_max=100,
def test_armadillo_hmc(dense_mass_matrix): n_cpu = 1 np.random.seed(4567) # Prepare data df = pd.read_csv('data/armadillo/armadillo_data_H2.csv').set_index('Time') df.drop(df.index[-1], axis=0, inplace=True) inputs = ['T_ext', 'P_hea'] y0 = df['T_int'][0] sT = 3600.0 * 24.0 df.index /= sT # Parameter settings for second order dynamic thermal model parameters = [ dict(name='Ro', scale=1e-2, bounds=(0, None), prior=Gamma(2, 0.1)), dict(name='Ri', scale=1e-3, bounds=(0, None), prior=Gamma(2, 0.1)), dict(name='Cw', scale=1e7 / sT, bounds=(0, None), prior=Gamma(2, 0.1)), dict(name='Ci', scale=1e6 / sT, bounds=(0, None), prior=Gamma(2, 0.1)), dict(name='sigw_w', scale=1e-2 * sT ** 0.5, bounds=(0, None), prior=Gamma(2, 0.1)), dict(name='sigw_i', value=0, transform='fixed'), dict(name='sigv', scale=1e-2, bounds=(0, None), prior=Gamma(2, 0.1)), dict(name='x0_w', loc=25, scale=7, prior=Normal(0, 1)), dict(name='x0_i', value=y0, transform='fixed'), dict(name='sigx0_w', value=0.1, transform='fixed'), dict(name='sigx0_i', value=0.1, transform='fixed'), ] reg = Regressor(TwTi_RoRi(parameters, hold_order=1)) fit = reg.fit( df=df, inputs=inputs, outputs='T_int', options={'n_cpu': n_cpu, 'dense_mass_matrix': dense_mass_matrix}, ) ym = reg.posterior_predictive(trace=fit.posterior, df=df, inputs=inputs, n_cpu=n_cpu)[0] pwloglik = reg.pointwise_log_likelihood( trace=fit.posterior, df=df, inputs=inputs, outputs='T_int', n_cpu=n_cpu ) # Inference diagnosis diag_ = fit.diagnostic assert np.all(diag_['ebfmi'] > 0.9) assert np.all((diag_['mean accept_prob'] > 0.7) & (diag_['mean accept_prob'] < 0.9)) assert np.sum(diag_['sum diverging']) == 0 assert np.sum(diag_['sum max_tree_depth']) == 0 # Convergence diagnosis sum_ = az.summary(fit.posterior, round_to='none') assert np.all(sum_['r_hat'] < 1.01) assert np.all(sum_[['ess_mean', 'ess_sd', 'ess_bulk', 'ess_tail']] > 1000) assert sum_['mean']['Ro'] == pytest.approx(1.778e-02, rel=1e-2) assert sum_['mean']['Ri'] == pytest.approx(2.001e-03, rel=1e-2) assert sum_['mean']['Cw'] == pytest.approx(1.714e02, rel=1e-2) assert sum_['mean']['Ci'] == pytest.approx(1.908e01, rel=1e-2) assert sum_['mean']['sigw_w'] == pytest.approx(5.503e-01, rel=1e-2) assert sum_['mean']['sigv'] == pytest.approx(3.470e-02, rel=1e-2) assert sum_['mean']['x0_w'] == pytest.approx(2.659e01, rel=1e-2) assert sum_['mean']['lp_'] == pytest.approx(-3.012e02, rel=1e-2) # Predictions tests assert np.mean(np.mean((df['T_int'].values - ym) ** 2, axis=1) ** 0.5) == pytest.approx( 8.358e-01, rel=1e-2 ) # Point-wise log-likelihood tests assert pwloglik['log_likelihood'].sum(axis=2).mean() == pytest.approx(3.274e02, rel=1e-2)
def regressor(matern_statespace): return Regressor(ss=matern_statespace)
def regressor_armadillo(statespace_armadillo): return Regressor(ss=statespace_armadillo)