Exemplo n.º 1
0
def test_make_iivm_data_return_types():
    np.random.seed(3141)
    res = make_iivm_data(n_obs=100, return_type='DoubleMLData')
    assert isinstance(res, DoubleMLData)
    res = make_iivm_data(n_obs=100, return_type='DataFrame')
    assert isinstance(res, pd.DataFrame)
    x, y, d, z = make_iivm_data(n_obs=100, return_type='array')
    assert isinstance(x, np.ndarray)
    assert isinstance(y, np.ndarray)
    assert isinstance(d, np.ndarray)
    assert isinstance(z, np.ndarray)
    with pytest.raises(ValueError, match=msg_inv_return_type):
        _ = make_iivm_data(n_obs=100, return_type='matrix')
Exemplo n.º 2
0
def generate_data_iivm(request):
    n_p = request.param
    np.random.seed(1111)
    # setting parameters
    n = n_p[0]
    p = n_p[1]
    theta = 0.5
    gamma_z = 0.4

    # generating data
    data = make_iivm_data(n, p, theta, gamma_z, return_type=pd.DataFrame)

    return data
Exemplo n.º 3
0
def generate_data_iivm(request):
    N_p = request.param
    np.random.seed(1111)
    # setting parameters
    N = N_p[0]
    p = N_p[1]
    theta = 0.5
    gamma_z = 0.4

    # generating data
    datasets = []
    for i in range(n_datasets):
        data = make_iivm_data(N, p, theta, gamma_z, return_type=pd.DataFrame)
        datasets.append(data)

    return datasets
from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, DoubleMLClusterData
from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data,\
    make_pliv_multiway_cluster_CKMS2021

from sklearn.linear_model import Lasso, LogisticRegression
from sklearn.base import BaseEstimator

np.random.seed(3141)
dml_data = make_plr_CCDDHNR2018(n_obs=10)
ml_g = Lasso()
ml_m = Lasso()
ml_r = Lasso()
dml_plr = DoubleMLPLR(dml_data, ml_g, ml_m)

dml_data_irm = make_irm_data(n_obs=10)
dml_data_iivm = make_iivm_data(n_obs=10)
dml_data_pliv = make_pliv_CHS2015(n_obs=10, dim_z=1)
dml_cluster_data_pliv = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
(x, y, d, z) = make_iivm_data(n_obs=30, return_type="array")
y[y > 0] = 1
y[y < 0] = 0
dml_data_irm_binary_outcome = DoubleMLData.from_arrays(x, y, d)
dml_data_iivm_binary_outcome = DoubleMLData.from_arrays(x, y, d, z)


@pytest.mark.ci
def test_doubleml_exception_data():
    msg = 'The data must be of DoubleMLData type.'
    with pytest.raises(TypeError, match=msg):
        _ = DoubleMLPLR(pd.DataFrame(), ml_g, ml_m)
Exemplo n.º 5
0
import pytest
import pandas as pd
import numpy as np

from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV
from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data

from sklearn.linear_model import Lasso, LogisticRegression

np.random.seed(3141)
dml_data_plr = make_plr_CCDDHNR2018(n_obs=100)
dml_data_pliv = make_pliv_CHS2015(n_obs=100, dim_z=1)
dml_data_irm = make_irm_data(n_obs=100)
dml_data_iivm = make_iivm_data(n_obs=100)

dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso())
dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso())
dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression())
dml_iivm = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(),
                        LogisticRegression())


@pytest.mark.ci
@pytest.mark.parametrize('dml_obj, cls', [(dml_plr, DoubleMLPLR),
                                          (dml_pliv, DoubleMLPLIV),
                                          (dml_irm, DoubleMLIRM),
                                          (dml_iivm, DoubleMLIIVM)])
def test_plr_return_types(dml_obj, cls):
    # ToDo: A second test case with multiple treatment variables would be helpful
    assert isinstance(dml_obj.__str__(), str)
    assert isinstance(dml_obj.summary, pd.DataFrame)