def test_doubleml_exception_confint(): dml_plr_confint = DoubleMLPLR(dml_data, ml_g, ml_m) msg = 'joint must be True or False. Got 1.' with pytest.raises(TypeError, match=msg): dml_plr_confint.confint(joint=1) msg = "The confidence level must be of float type. 5% of type <class 'str'> was passed." with pytest.raises(TypeError, match=msg): dml_plr_confint.confint(level='5%') msg = r'The confidence level must be in \(0,1\). 0.0 was passed.' with pytest.raises(ValueError, match=msg): dml_plr_confint.confint(level=0.) msg = r'Apply fit\(\) before confint\(\).' with pytest.raises(ValueError, match=msg): dml_plr_confint.confint() msg = r'Apply fit\(\) & bootstrap\(\) before confint\(joint=True\).' with pytest.raises(ValueError, match=msg): dml_plr_confint.confint(joint=True) dml_plr_confint.fit() # error message should still appear till bootstrap was applied as well with pytest.raises(ValueError, match=msg): dml_plr_confint.confint(joint=True) dml_plr_confint.bootstrap() df_ci = dml_plr_confint.confint(joint=True) assert isinstance(df_ci, pd.DataFrame)
def test_doubleml_exception_no_cross_fit(): msg = 'Estimation without cross-fitting not supported for n_folds > 2.' with pytest.raises(AssertionError, match=msg): _ = DoubleMLPLR(dml_data, ml_g, ml_m, apply_cross_fitting=False)
def test_doubleml_exception_bootstrap(): dml_plr_boot = DoubleMLPLR(dml_data, ml_g, ml_m) msg = r'Apply fit\(\) before bootstrap\(\).' with pytest.raises(ValueError, match=msg): dml_plr_boot.bootstrap() dml_plr_boot.fit() msg = 'Method must be "Bayes", "normal" or "wild". Got Gaussian.' with pytest.raises(ValueError, match=msg): dml_plr_boot.bootstrap(method='Gaussian') msg = "The number of bootstrap replications must be of int type. 500 of type <class 'str'> was passed." with pytest.raises(TypeError, match=msg): dml_plr_boot.bootstrap(n_rep_boot='500') msg = 'The number of bootstrap replications must be positive. 0 was passed.' with pytest.raises(ValueError, match=msg): dml_plr_boot.bootstrap(n_rep_boot=0)
import pandas as pd import numpy as np from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, DoubleMLClusterData from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data,\ make_pliv_multiway_cluster_CKMS2021 from sklearn.linear_model import Lasso, LogisticRegression from sklearn.base import BaseEstimator np.random.seed(3141) dml_data = make_plr_CCDDHNR2018(n_obs=10) ml_g = Lasso() ml_m = Lasso() ml_r = Lasso() dml_plr = DoubleMLPLR(dml_data, ml_g, ml_m) dml_data_irm = make_irm_data(n_obs=10) dml_data_iivm = make_iivm_data(n_obs=10) dml_data_pliv = make_pliv_CHS2015(n_obs=10, dim_z=1) dml_cluster_data_pliv = make_pliv_multiway_cluster_CKMS2021(N=10, M=10) (x, y, d, z) = make_iivm_data(n_obs=30, return_type="array") y[y > 0] = 1 y[y < 0] = 0 dml_data_irm_binary_outcome = DoubleMLData.from_arrays(x, y, d) dml_data_iivm_binary_outcome = DoubleMLData.from_arrays(x, y, d, z) @pytest.mark.ci def test_doubleml_exception_data(): msg = 'The data must be of DoubleMLData type.'
def test_doubleml_warning_crossfitting_onefold(): msg = 'apply_cross_fitting is set to False. Cross-fitting is not supported for n_folds = 1.' with pytest.warns(UserWarning, match=msg): _ = DoubleMLPLR(dml_data, ml_g, ml_m, apply_cross_fitting=True, n_folds=1)
def test_doubleml_draw_vs_set(): np.random.seed(3141) dml_plr_set = DoubleMLPLR(dml_data, ml_g, ml_m, n_folds=7, n_rep=8) dml_plr_drawn = DoubleMLPLR(dml_data, ml_g, ml_m, n_folds=1, n_rep=1, apply_cross_fitting=False) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0]) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0][0]) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_drawn = DoubleMLPLR(dml_data, ml_g, ml_m, n_folds=2, n_rep=1, apply_cross_fitting=False) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0]) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0][0]) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_drawn = DoubleMLPLR(dml_data, ml_g, ml_m, n_folds=2, n_rep=1, apply_cross_fitting=True) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0]) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_drawn = DoubleMLPLR(dml_data, ml_g, ml_m, n_folds=5, n_rep=1, apply_cross_fitting=True) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0]) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_drawn = DoubleMLPLR(dml_data, ml_g, ml_m, n_folds=5, n_rep=3, apply_cross_fitting=True) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_drawn = DoubleMLPLR(dml_data, ml_g, ml_m, n_folds=2, n_rep=4, apply_cross_fitting=False) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls) _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
import pytest import pandas as pd import numpy as np from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data from sklearn.linear_model import Lasso, LogisticRegression np.random.seed(3141) dml_data_plr = make_plr_CCDDHNR2018(n_obs=100) dml_data_pliv = make_pliv_CHS2015(n_obs=100, dim_z=1) dml_data_irm = make_irm_data(n_obs=100) dml_data_iivm = make_iivm_data(n_obs=100) dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso()) dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso()) dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression()) dml_iivm = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression()) @pytest.mark.ci @pytest.mark.parametrize('dml_obj, cls', [(dml_plr, DoubleMLPLR), (dml_pliv, DoubleMLPLIV), (dml_irm, DoubleMLIRM), (dml_iivm, DoubleMLIIVM)]) def test_plr_return_types(dml_obj, cls): # ToDo: A second test case with multiple treatment variables would be helpful assert isinstance(dml_obj.__str__(), str) assert isinstance(dml_obj.summary, pd.DataFrame)
import pytest import numpy as np from doubleml import DoubleMLPLR from doubleml.datasets import make_plr_CCDDHNR2018 from sklearn.linear_model import Lasso np.random.seed(3141) dml_data = make_plr_CCDDHNR2018(n_obs=10) ml_g = Lasso() ml_m = Lasso() dml_plr = DoubleMLPLR(dml_data, ml_g, ml_m, n_folds=7, n_rep=8, draw_sample_splitting=False) def _assert_resampling_pars(dml_obj0, dml_obj1): assert dml_obj0.n_folds == dml_obj1.n_folds assert dml_obj0.n_rep == dml_obj1.n_rep assert dml_obj0.apply_cross_fitting == dml_obj1.apply_cross_fitting _assert_smpls_equal(dml_obj0.smpls, dml_obj1.smpls, dml_obj0.apply_cross_fitting) def _assert_smpls_equal(smpls0, smpls1, apply_cross_fitting=True): assert len(smpls0) == len(smpls1) for i_rep, _ in enumerate(smpls0):
import pytest import numpy as np from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data from sklearn.linear_model import Lasso, LogisticRegression np.random.seed(3141) dml_data_plr = make_plr_CCDDHNR2018(n_obs=100) dml_data_pliv = make_pliv_CHS2015(n_obs=100, dim_z=1) dml_data_irm = make_irm_data(n_obs=100) dml_data_iivm = make_iivm_data(n_obs=100) dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso()) dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso()) dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression()) dml_iivm = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression()) def _assert_resampling_default_settings(dml_obj): assert dml_obj.n_folds == 5 assert dml_obj.n_rep == 1 assert dml_obj.draw_sample_splitting assert dml_obj.apply_cross_fitting @pytest.mark.ci def test_plr_defaults(): _assert_resampling_default_settings(dml_plr)
import pytest import numpy as np from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data from sklearn.linear_model import Lasso, LogisticRegression np.random.seed(3141) dml_data_plr = make_plr_CCDDHNR2018(n_obs=100) dml_data_pliv = make_pliv_CHS2015(n_obs=100, dim_z=1) dml_data_irm = make_irm_data(n_obs=100) dml_data_iivm = make_iivm_data(n_obs=100) dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso()) dml_plr.fit() dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso()) dml_pliv.fit() dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression()) dml_irm.fit() dml_iivm = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression()) dml_iivm.fit() # fit models with callable scores plr_score = dml_plr._score_elements dml_plr_callable_score = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(), score=plr_score, draw_sample_splitting=False)
def test_doubleml_exception_p_adjust(): dml_plr_p_adjust = DoubleMLPLR(dml_data, ml_g, ml_m) msg = r'Apply fit\(\) before p_adjust\(\).' with pytest.raises(ValueError, match=msg): dml_plr_p_adjust.p_adjust() dml_plr_p_adjust.fit() msg = r'Apply fit\(\) & bootstrap\(\) before p_adjust' with pytest.raises(ValueError, match=msg): dml_plr_p_adjust.p_adjust(method='romano-wolf') dml_plr_p_adjust.bootstrap() p_val = dml_plr_p_adjust.p_adjust(method='romano-wolf') assert isinstance(p_val, pd.DataFrame) msg = "The p_adjust method must be of str type. 0.05 of type <class 'float'> was passed." with pytest.raises(TypeError, match=msg): dml_plr_p_adjust.p_adjust(method=0.05)
def test_doubleml_exception_smpls(): msg = ('Sample splitting not specified. ' r'Either draw samples via .draw_sample splitting\(\) or set external samples via .set_sample_splitting\(\).') dml_plr_no_smpls = DoubleMLPLR(dml_data, ml_g, ml_m, draw_sample_splitting=False) with pytest.raises(ValueError, match=msg): _ = dml_plr_no_smpls.smpls