Exemplo n.º 1
0
def test_make_pliv_CHS2015_return_types():
    np.random.seed(3141)
    res = make_pliv_CHS2015(n_obs=100, return_type='DoubleMLData')
    assert isinstance(res, DoubleMLData)
    res = make_pliv_CHS2015(n_obs=100, return_type='DataFrame')
    assert isinstance(res, pd.DataFrame)
    x, y, d, z = make_pliv_CHS2015(n_obs=100, return_type='array')
    assert isinstance(x, np.ndarray)
    assert isinstance(y, np.ndarray)
    assert isinstance(d, np.ndarray)
    assert isinstance(z, np.ndarray)
    with pytest.raises(ValueError, match=msg_inv_return_type):
        _ = make_pliv_CHS2015(n_obs=100, return_type='matrix')
def test_pliv_callable_not_implemented():
    np.random.seed(3141)
    dml_data_pliv_2z = make_pliv_CHS2015(n_obs=100, dim_z=2)
    pliv_score = dml_pliv._score_elements

    dml_pliv_callable_score = DoubleMLPLIV._partialX(dml_data_pliv_2z,
                                                     Lasso(),
                                                     Lasso(),
                                                     Lasso(),
                                                     score=pliv_score)
    msg = 'Callable score not implemented for DoubleMLPLIV.partialX with several instruments.'
    with pytest.raises(NotImplementedError, match=msg):
        dml_pliv_callable_score.fit()

    dml_pliv_callable_score = DoubleMLPLIV._partialZ(dml_data_pliv_2z,
                                                     Lasso(),
                                                     score=pliv_score)
    msg = 'Callable score not implemented for DoubleMLPLIV.partialZ.'
    with pytest.raises(NotImplementedError, match=msg):
        dml_pliv_callable_score.fit()

    dml_pliv_callable_score = DoubleMLPLIV._partialXZ(dml_data_pliv_2z,
                                                      Lasso(),
                                                      Lasso(),
                                                      Lasso(),
                                                      score=pliv_score)
    msg = 'Callable score not implemented for DoubleMLPLIV.partialXZ.'
    with pytest.raises(NotImplementedError, match=msg):
        dml_pliv_callable_score.fit()
Exemplo n.º 3
0
def test_obj_vs_from_arrays():
    np.random.seed(3141)
    dml_data = make_plr_CCDDHNR2018(n_obs=100)
    dml_data_from_array = DoubleMLData.from_arrays(
        dml_data.data[dml_data.x_cols], dml_data.data[dml_data.y_col],
        dml_data.data[dml_data.d_cols])
    assert dml_data_from_array.data.equals(dml_data.data)

    dml_data = _make_pliv_data(n_obs=100)
    dml_data_from_array = DoubleMLData.from_arrays(
        dml_data.data[dml_data.x_cols], dml_data.data[dml_data.y_col],
        dml_data.data[dml_data.d_cols], dml_data.data[dml_data.z_cols])
    assert dml_data_from_array.data.equals(dml_data.data)

    dml_data = make_pliv_CHS2015(n_obs=100, dim_z=5)
    dml_data_from_array = DoubleMLData.from_arrays(
        dml_data.data[dml_data.x_cols], dml_data.data[dml_data.y_col],
        dml_data.data[dml_data.d_cols], dml_data.data[dml_data.z_cols])
    assert np.array_equal(dml_data_from_array.data,
                          dml_data.data)  # z_cols name differ

    dml_data = make_plr_CCDDHNR2018(n_obs=100)
    df = dml_data.data.copy().iloc[:, :10]
    df.columns = [f'X{i+1}' for i in np.arange(7)] + ['y', 'd1', 'd2']
    dml_data = DoubleMLData(df, 'y', ['d1', 'd2'],
                            [f'X{i+1}' for i in np.arange(7)])
    dml_data_from_array = DoubleMLData.from_arrays(
        dml_data.data[dml_data.x_cols], dml_data.data[dml_data.y_col],
        dml_data.data[dml_data.d_cols])
    assert np.array_equal(dml_data_from_array.data, dml_data.data)
Exemplo n.º 4
0
def test_obj_vs_from_arrays():
    np.random.seed(3141)
    dml_data = make_plr_CCDDHNR2018(n_obs=100)
    dml_data_from_array = DoubleMLData.from_arrays(dml_data.data[dml_data.x_cols],
                                                   dml_data.data[dml_data.y_col],
                                                   dml_data.data[dml_data.d_cols])
    assert dml_data_from_array.data.equals(dml_data.data)

    dml_data = _make_pliv_data(n_obs=100)
    dml_data_from_array = DoubleMLData.from_arrays(dml_data.data[dml_data.x_cols],
                                                   dml_data.data[dml_data.y_col],
                                                   dml_data.data[dml_data.d_cols],
                                                   dml_data.data[dml_data.z_cols])
    assert dml_data_from_array.data.equals(dml_data.data)

    dml_data = make_pliv_CHS2015(n_obs=100, dim_z=5)
    dml_data_from_array = DoubleMLData.from_arrays(dml_data.data[dml_data.x_cols],
                                                   dml_data.data[dml_data.y_col],
                                                   dml_data.data[dml_data.d_cols],
                                                   dml_data.data[dml_data.z_cols])
    assert np.array_equal(dml_data_from_array.data, dml_data.data)  # z_cols name differ

    dml_data = make_plr_CCDDHNR2018(n_obs=100)
    df = dml_data.data.copy().iloc[:, :10]
    df.columns = [f'X{i+1}' for i in np.arange(7)] + ['y', 'd1', 'd2']
    dml_data = DoubleMLData(df, 'y', ['d1', 'd2'], [f'X{i+1}' for i in np.arange(7)])
    dml_data_from_array = DoubleMLData.from_arrays(dml_data.data[dml_data.x_cols],
                                                   dml_data.data[dml_data.y_col],
                                                   dml_data.data[dml_data.d_cols])
    assert np.array_equal(dml_data_from_array.data, dml_data.data)

    dml_data = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
    dml_data_from_array = DoubleMLClusterData.from_arrays(dml_data.data[dml_data.x_cols],
                                                          dml_data.data[dml_data.y_col],
                                                          dml_data.data[dml_data.d_cols],
                                                          dml_data.data[dml_data.cluster_cols],
                                                          dml_data.data[dml_data.z_cols])
    df = dml_data.data.copy()
    df.rename(columns={'cluster_var_i': 'cluster_var1',
                       'cluster_var_j': 'cluster_var2',
                       'Y': 'y', 'D': 'd', 'Z': 'z'},
              inplace=True)
    assert dml_data_from_array.data.equals(df)

    # with a single cluster variable
    dml_data_from_array = DoubleMLClusterData.from_arrays(dml_data.data[dml_data.x_cols],
                                                          dml_data.data[dml_data.y_col],
                                                          dml_data.data[dml_data.d_cols],
                                                          dml_data.data[dml_data.cluster_cols[1]],
                                                          dml_data.data[dml_data.z_cols])
    df = dml_data.data.copy().drop(columns='cluster_var_i')
    df.rename(columns={'cluster_var_j': 'cluster_var',
                       'Y': 'y', 'D': 'd', 'Z': 'z'},
              inplace=True)
    assert dml_data_from_array.data.equals(df)
Exemplo n.º 5
0
def generate_data_pliv_partialX(request):
    n_p = request.param
    np.random.seed(1111)
    # setting parameters
    n = n_p
    theta = 1.

    # generating data
    data = make_pliv_CHS2015(n, alpha=theta, dim_z=5)

    return data
Exemplo n.º 6
0
def generate_data_iv(request):
    n_p = request.param
    np.random.seed(1111)
    # setting parameters
    n = n_p[0]
    p = n_p[1]
    theta = 0.5

    # generating data
    data = make_pliv_CHS2015(n_obs=n, dim_x=p, alpha=theta, dim_z=1, return_type=pd.DataFrame)

    return data
Exemplo n.º 7
0
def generate_data_pliv_partialX(request):
    N_p = request.param
    np.random.seed(1111)
    # setting parameters
    N = N_p
    theta = 1.

    # generating data
    datasets = []
    for i in range(n_datasets):
        data = make_pliv_CHS2015(N, alpha=theta, dim_z=5)
        datasets.append(data)

    return datasets
Exemplo n.º 8
0
def generate_data_iv(request):
    N_p = request.param
    np.random.seed(1111)
    # setting parameters
    N = N_p[0]
    p = N_p[1]
    theta = 0.5

    # generating data
    datasets = []
    for i in range(n_datasets):
        data = make_pliv_CHS2015(n_obs=N,
                                 dim_x=p,
                                 alpha=theta,
                                 dim_z=1,
                                 return_type=pd.DataFrame)
        datasets.append(data)

    return datasets
from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data,\
    make_pliv_multiway_cluster_CKMS2021

from sklearn.linear_model import Lasso, LogisticRegression
from sklearn.base import BaseEstimator

np.random.seed(3141)
dml_data = make_plr_CCDDHNR2018(n_obs=10)
ml_g = Lasso()
ml_m = Lasso()
ml_r = Lasso()
dml_plr = DoubleMLPLR(dml_data, ml_g, ml_m)

dml_data_irm = make_irm_data(n_obs=10)
dml_data_iivm = make_iivm_data(n_obs=10)
dml_data_pliv = make_pliv_CHS2015(n_obs=10, dim_z=1)
dml_cluster_data_pliv = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
(x, y, d, z) = make_iivm_data(n_obs=30, return_type="array")
y[y > 0] = 1
y[y < 0] = 0
dml_data_irm_binary_outcome = DoubleMLData.from_arrays(x, y, d)
dml_data_iivm_binary_outcome = DoubleMLData.from_arrays(x, y, d, z)


@pytest.mark.ci
def test_doubleml_exception_data():
    msg = 'The data must be of DoubleMLData type.'
    with pytest.raises(TypeError, match=msg):
        _ = DoubleMLPLR(pd.DataFrame(), ml_g, ml_m)

    # PLR with IV