def test_doubleml_exception_scores():
    msg = 'Invalid score IV. Valid score IV-type or partialling out.'
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLPLR(dml_data, ml_g, ml_m, score='IV')
    msg = 'score should be either a string or a callable. 0 was passed.'
    with pytest.raises(TypeError, match=msg):
        _ = DoubleMLPLR(dml_data, ml_g, ml_m, score=0)

    msg = 'Invalid score IV. Valid score ATE or ATTE.'
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), score='IV')
    msg = 'score should be either a string or a callable. 0 was passed.'
    with pytest.raises(TypeError, match=msg):
        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), score=0)

    msg = 'Invalid score ATE. Valid score LATE.'
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(), score='ATE')
    msg = 'score should be either a string or a callable. 0 was passed.'
    with pytest.raises(TypeError, match=msg):
        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(), score=0)

    msg = 'Invalid score IV. Valid score partialling out.'
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso(), score='IV')
    msg = 'score should be either a string or a callable. 0 was passed.'
    with pytest.raises(TypeError, match=msg):
        _ = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso(), score=0)
def test_doubleml_exception_data():
    msg = 'The data must be of DoubleMLData type.'
    with pytest.raises(TypeError, match=msg):
        _ = DoubleMLPLR(pd.DataFrame(), ml_g, ml_m)

    msg = (
        r'Incompatible data. Z1 have been set as instrumental variable\(s\). '
        'To fit a partially linear IV regression model use DoubleMLPLIV instead of DoubleMLPLR.'
    )
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLPLR(dml_data_pliv, ml_g, ml_m)

    msg = (
        r'Incompatible data. z have been set as instrumental variable\(s\). '
        'To fit an interactive IV regression model use DoubleMLIIVM instead of DoubleMLIRM.'
    )
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLIRM(dml_data_iivm, Lasso(), LogisticRegression())
    msg = (
        'Incompatible data. To fit an IRM model with DML exactly one binary variable with values 0 and 1 '
        'needs to be specified as treatment variable.')
    df_irm = dml_data_irm.data.copy()
    df_irm['d'] = df_irm['d'] * 2
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLIRM(DoubleMLData(df_irm, 'y', 'd'), Lasso(),
                        LogisticRegression())
    df_irm = dml_data_irm.data.copy()
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLIRM(DoubleMLData(df_irm, 'y', ['d', 'X1']), Lasso(),
                        LogisticRegression())

    msg = (
        'Incompatible data. To fit an IIVM model with DML exactly one binary variable with values 0 and 1 '
        'needs to be specified as treatment variable.')
    df_iivm = dml_data_iivm.data.copy()
    df_iivm['d'] = df_iivm['d'] * 2
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLIIVM(DoubleMLData(df_iivm, 'y', 'd', z_cols='z'), Lasso(),
                         LogisticRegression(), LogisticRegression())
    df_iivm = dml_data_iivm.data.copy()
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLIIVM(DoubleMLData(df_iivm, 'y', ['d', 'X1'], z_cols='z'),
                         Lasso(), LogisticRegression(), LogisticRegression())

    msg = (
        'Incompatible data. To fit an IIVM model with DML exactly one binary variable with values 0 and 1 '
        'needs to be specified as instrumental variable.')
    df_iivm = dml_data_iivm.data.copy()
    df_iivm['z'] = df_iivm['z'] * 2
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLIIVM(DoubleMLData(df_iivm, 'y', 'd', z_cols='z'), Lasso(),
                         LogisticRegression(), LogisticRegression())
    df_iivm = dml_data_iivm.data.copy()
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLIIVM(DoubleMLData(df_iivm, 'y', 'd', z_cols=['z', 'X1']),
                         Lasso(), LogisticRegression(), LogisticRegression())
def test_doubleml_exception_and_warning_learner():
    # msg = err_msg_prefix + r'_DummyNoClassifier\(\) has no method .predict\(\).'
    with pytest.raises(TypeError):
        _ = DoubleMLPLR(dml_data, _DummyNoClassifier(), Lasso())
    msg = 'Invalid learner provided for ml_m: ' + r'Lasso\(\) has no method .predict_proba\(\).'
    with pytest.raises(TypeError, match=msg):
        _ = DoubleMLIRM(dml_data_irm, Lasso(), Lasso())
def test_doubleml_exception_trimming_rule():
    msg = 'Invalid trimming_rule discard. Valid trimming_rule truncate.'
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLIRM(dml_data_irm,
                        Lasso(),
                        LogisticRegression(),
                        trimming_rule='discard')
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLIIVM(dml_data_iivm,
                         Lasso(),
                         LogisticRegression(),
                         LogisticRegression(),
                         trimming_rule='discard')
def test_doubleml_exception_learner():
    err_msg_prefix = 'Invalid learner provided for ml_g: '
    warn_msg_prefix = 'Learner provided for ml_g is probably invalid: '

    msg = err_msg_prefix + 'provide an instance of a learner instead of a class.'
    with pytest.raises(TypeError, match=msg):
        _ = DoubleMLPLR(dml_data, Lasso, ml_m)
    msg = err_msg_prefix + r'BaseEstimator\(\) has no method .fit\(\).'
    with pytest.raises(TypeError, match=msg):
        _ = DoubleMLPLR(dml_data, BaseEstimator(), ml_m)
    # msg = err_msg_prefix + r'_DummyNoSetParams\(\) has no method .set_params\(\).'
    with pytest.raises(TypeError):
        _ = DoubleMLPLR(dml_data, _DummyNoSetParams(), ml_m)
    # msg = err_msg_prefix + r'_DummyNoSetParams\(\) has no method .get_params\(\).'
    with pytest.raises(TypeError):
        _ = DoubleMLPLR(dml_data, _DummyNoGetParams(), ml_m)

    # msg = 'Learner provided for ml_m is probably invalid: ' + r'_DummyNoClassifier\(\) is \(probably\) no classifier.'
    with pytest.warns(UserWarning):
        _ = DoubleMLIRM(dml_data_irm, Lasso(), _DummyNoClassifier())

    # ToDo: Currently for ml_g (and others) we only check whether the learner can be identified as regressor. However,
    # we do not check whether it can instead be identified as classifier, which could be used to throw an error.
    msg = warn_msg_prefix + r'LogisticRegression\(\) is \(probably\) no regressor.'
    with pytest.warns(UserWarning, match=msg):
        _ = DoubleMLPLR(dml_data, LogisticRegression(), Lasso())

    # we allow classifiers for ml_m in PLR, but only for binary treatment variables
    msg = (
        r'The ml_m learner LogisticRegression\(\) was identified as classifier '
        'but at least one treatment variable is not binary with values 0 and 1.'
    )
    with pytest.raises(ValueError, match=msg):
        _ = DoubleMLPLR(dml_data, Lasso(), LogisticRegression())

    # construct a classifier which is not identifiable as classifier via is_classifier by sklearn
    # it then predicts labels and therefore an exception will be thrown
    log_reg = LogisticRegression()
    log_reg._estimator_type = None
    msg = (
        r'Learner provided for ml_m is probably invalid: LogisticRegression\(\) is \(probably\) neither a regressor '
        'nor a classifier. Method predict is used for prediction.')
    with pytest.warns(UserWarning, match=msg):
        dml_plr_hidden_classifier = DoubleMLPLR(dml_data_irm, Lasso(), log_reg)
    msg = (
        r'For the binary treatment variable d, predictions obtained with the ml_m learner LogisticRegression\(\) '
        'are also observed to be binary with values 0 and 1. Make sure that for classifiers probabilities and not '
        'labels are predicted.')
    with pytest.raises(ValueError, match=msg):
        dml_plr_hidden_classifier.fit()
Exemplo n.º 6
0
import numpy as np

from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV
from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data

from sklearn.linear_model import Lasso, LogisticRegression

np.random.seed(3141)
dml_data_plr = make_plr_CCDDHNR2018(n_obs=100)
dml_data_pliv = make_pliv_CHS2015(n_obs=100, dim_z=1)
dml_data_irm = make_irm_data(n_obs=100)
dml_data_iivm = make_iivm_data(n_obs=100)

dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso())
dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso())
dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression())
dml_iivm = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(),
                        LogisticRegression())


@pytest.mark.ci
@pytest.mark.parametrize('dml_obj, cls', [(dml_plr, DoubleMLPLR),
                                          (dml_pliv, DoubleMLPLIV),
                                          (dml_irm, DoubleMLIRM),
                                          (dml_iivm, DoubleMLIIVM)])
def test_plr_return_types(dml_obj, cls):
    # ToDo: A second test case with multiple treatment variables would be helpful
    assert isinstance(dml_obj.__str__(), str)
    assert isinstance(dml_obj.summary, pd.DataFrame)
    assert isinstance(dml_obj.draw_sample_splitting(), cls)
    assert isinstance(dml_obj.set_sample_splitting(dml_obj.smpls), cls)
import numpy as np

from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV
from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data

from sklearn.linear_model import Lasso, LogisticRegression

np.random.seed(3141)
dml_data_plr = make_plr_CCDDHNR2018(n_obs=100)
dml_data_pliv = make_pliv_CHS2015(n_obs=100, dim_z=1)
dml_data_irm = make_irm_data(n_obs=100)
dml_data_iivm = make_iivm_data(n_obs=100)

dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso())
dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso())
dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression())
dml_iivm = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(),
                        LogisticRegression())


def _assert_resampling_default_settings(dml_obj):
    assert dml_obj.n_folds == 5
    assert dml_obj.n_rep == 1
    assert dml_obj.draw_sample_splitting
    assert dml_obj.apply_cross_fitting


@pytest.mark.ci
def test_plr_defaults():
    _assert_resampling_default_settings(dml_plr)
    assert dml_plr.score == 'partialling out'
from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV
from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data

from sklearn.linear_model import Lasso, LogisticRegression

np.random.seed(3141)
dml_data_plr = make_plr_CCDDHNR2018(n_obs=100)
dml_data_pliv = make_pliv_CHS2015(n_obs=100, dim_z=1)
dml_data_irm = make_irm_data(n_obs=100)
dml_data_iivm = make_iivm_data(n_obs=100)

dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso())
dml_plr.fit()
dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso())
dml_pliv.fit()
dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression())
dml_irm.fit()
dml_iivm = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(),
                        LogisticRegression())
dml_iivm.fit()

# fit models with callable scores
plr_score = dml_plr._score_elements
dml_plr_callable_score = DoubleMLPLR(dml_data_plr,
                                     Lasso(),
                                     Lasso(),
                                     score=plr_score,
                                     draw_sample_splitting=False)
dml_plr_callable_score.set_sample_splitting(dml_plr.smpls)
dml_plr_callable_score.fit(store_predictions=True)