Exemplo n.º 1
0
def test_enable_ipw_without_known_propensity_parallel_runner():

    seed = 0

    df = generate_data(
        N=1000,
        n_features=3,
        beta=[0, -2, 3, -5],  # Effect of [intercept and features] on outcome
        error_std=0.1,
        tau=[1, -5, -5, 10],  # Effect of [intercept and features] on treated outcome
        tau_std=0.1,
        discrete_outcome=True,
        seed=seed,
        feature_effect=0,  # Effect of beta on treated outxome
        propensity_coef=[
            0,
            -1,
            1,
            -1,
        ],  # Effect of [intercept and features] on propensity log-odds for treatment
        index_name="index",
    )

    train_df, test_df = train_test_split(df, test_size=0.2, stratify=df["Treatment"])

    cl = CausalLift(
        train_df, test_df, enable_ipw=True, verbose=3, runner="ParallelRunner"
    )
    train_df, test_df = cl.estimate_cate_by_2_models()
    estimated_effect_df = cl.estimate_recommendation_impact()
    assert isinstance(train_df, pd.DataFrame)
    assert isinstance(test_df, pd.DataFrame)
    assert isinstance(estimated_effect_df, pd.DataFrame)
Exemplo n.º 2
0
def test_enable_ipw_with_known_propensity():
    seed = 0

    df = generate_data(
        N=1000,
        n_features=3,
        beta=[0, -2, 3, -5],  # Effect of [intercept and features] on outcome
        error_std=0.1,
        tau=[1, -5, -5,
             10],  # Effect of [intercept and features] on treated outcome
        tau_std=0.1,
        discrete_outcome=True,
        seed=seed,
        feature_effect=0,  # Effect of beta on treated outxome
        propensity_coef=[
            0,
            -1,
            1,
            -1,
        ],  # Effect of [intercept and features] on propensity log-odds for treatment
        index_name="index",
    )

    train_df, test_df = train_test_split(df,
                                         test_size=0.2,
                                         random_state=seed,
                                         stratify=df["Treatment"])

    test_random_propensity = True

    if test_random_propensity:
        import random

        train_df = train_df.copy()
        train_df.loc[:, "Propensity"] = [
            random.random() for _ in range(train_df.shape[0])
        ]

        test_df = test_df.copy()
        test_df.loc[:, "Propensity"] = [
            random.random() for _ in range(test_df.shape[0])
        ]

    cl = CausalLift(train_df, test_df, enable_ipw=True, verbose=3)
    train_df, test_df = cl.estimate_cate_by_2_models()
    estimated_effect_df = cl.estimate_recommendation_impact()
    assert isinstance(train_df, pd.DataFrame)
    assert isinstance(test_df, pd.DataFrame)
    assert isinstance(estimated_effect_df, pd.DataFrame)
Exemplo n.º 3
0
from causallift import CausalLift

from sklearn.model_selection import train_test_split

seed = 0

from causallift import generate_data

df = generate_data( \
    N=1000,
    n_features=3,
    beta=[0, -2, 3, -5],  # Effect of [intercept and features] on outcome
    error_std=0.1,
    tau=[1, -5, -5, 10],  # Effect of [intercept and features] on treated outcome
    tau_std=0.1,
    discrete_outcome=True,
    seed=seed,
    feature_effect=0,  # Effect of beta on treated outxome
    propensity_coef=[0, -1, 1, -1],  # Effect of [intercept and features] on propensity log-odds for treatment
    index_name='index')

train_df, test_df = train_test_split(df,
                                     test_size=0.2,
                                     random_state=seed,
                                     stratify=df['Treatment'])

cl = CausalLift(train_df, test_df, enable_ipw=False, random_state=0, verbose=3)
train_df, test_df = cl.estimate_cate_by_2_models()
estimated_effect_df = cl.estimate_recommendation_impact()
Exemplo n.º 4
0
def test_skopt():

    seed = 0

    df = generate_data(
        N=1000,
        n_features=3,
        beta=[0, -2, 3, -5],  # Effect of [intercept and features] on outcome
        error_std=0.1,
        tau=[1, -5, -5,
             10],  # Effect of [intercept and features] on treated outcome
        tau_std=0.1,
        discrete_outcome=True,
        seed=seed,
        feature_effect=0,  # Effect of beta on treated outxome
        propensity_coef=[
            0,
            -1,
            1,
            -1,
        ],  # Effect of [intercept and features] on propensity log-odds for treatment
        index_name="index",
    )

    train_df, test_df = train_test_split(df,
                                         test_size=0.2,
                                         random_state=seed,
                                         stratify=df["Treatment"])

    uplift_model_params = dict(
        search_cv="skopt.BayesSearchCV",
        estimator="xgboost.XGBClassifier",
        const_params=dict(
            booster="gbtree",
            silent=True,
            objective="binary:logistic",
            base_score=0.5,
            eval_metric="auc",
            n_jobs=-1,
            seed=seed,
        ),
        search_spaces=dict(n_estimators=Integer(100, 200)),
        random_state=seed,
        scoring="roc_auc",
        cv=3,
        n_jobs=-1,
        n_iter=10,
        verbose=1,
        refit=True,
    )

    cl = CausalLift(
        train_df,
        test_df,
        enable_ipw=True,
        verbose=3,
        uplift_model_params=uplift_model_params,
    )
    train_df, test_df = cl.estimate_cate_by_2_models()
    estimated_effect_df = cl.estimate_recommendation_impact()
    assert isinstance(train_df, pd.DataFrame)
    assert isinstance(test_df, pd.DataFrame)
    assert isinstance(estimated_effect_df, pd.DataFrame)

    seed = 0

    df = generate_data(
        N=1000,
        n_features=3,
        beta=[0, -2, 3, -5],  # Effect of [intercept and features] on outcome
        error_std=0.1,
        tau=[1, -5, -5,
             10],  # Effect of [intercept and features] on treated outcome
        tau_std=0.1,
        discrete_outcome=True,
        seed=seed,
        feature_effect=0,  # Effect of beta on treated outxome
        propensity_coef=[
            0,
            -1,
            1,
            -1,
        ],  # Effect of [intercept and features] on propensity log-odds for treatment
        index_name="index",
    )

    train_df, test_df = train_test_split(df,
                                         test_size=0.2,
                                         random_state=seed,
                                         stratify=df["Treatment"])
    estimator = XGBClassifier(
        booster="gbtree",
        silent=True,
        objective="binary:logistic",
        base_score=0.5,
        eval_metric="auc",
        n_jobs=-1,
        seed=seed,
    )

    model = BayesSearchCV(
        estimator=estimator,
        search_spaces=dict(n_estimators=Integer(100, 200)),
        random_state=seed,
        scoring="roc_auc",
        cv=3,
        n_jobs=-1,
        n_iter=10,
        verbose=1,
        refit=True,
    )

    cl_ext = CausalLift(train_df,
                        test_df,
                        enable_ipw=True,
                        verbose=3,
                        uplift_model_params=model)
    train_df_ext, test_df_ext = cl_ext.estimate_cate_by_2_models()
    estimated_effect_df_ext = cl_ext.estimate_recommendation_impact()
    assert isinstance(train_df_ext, pd.DataFrame)
    assert isinstance(test_df_ext, pd.DataFrame)
    assert isinstance(estimated_effect_df_ext, pd.DataFrame)
Exemplo n.º 5
0
def causal_lift_model(cm_df, decoder, treatment_name, use_multi=True, seed=5):
    # Need to drop null for computations - could also impute these
    original_len = len(cm_df)
    print('Original data length {}'.format(original_len))
    cm_df[treatment_name] = cm_df[treatment_name].dropna().reset_index(
        drop=True)
    clean_len = len(cm_df)
    print('Clean data length {}'.format(clean_len))
    ratio_dropped = (original_len - clean_len) / clean_len

    # Separate treatment into two categories (below or above average)
    treatment_below_avg = cm_df[treatment_name].mean()
    cm_df['Treatment'] = np.where(cm_df[treatment_name] <= treatment_below_avg,
                                  1, 0)

    # Use multiple classes as target
    if use_multi:
        cm_df['Outcome'] = cm_df['Description']
        # Use the top 10 features
        cm_df = cm_df[[
            'Treatment', 'Outcome', 'Weapon_FIREARM', 'Weapon_HANDS',
            'Weapon_KNIFE', 'Weapon_NONE', 'Weapon_OTHER', 'Neighborhood',
            'Premise', 'Month', 'Hour', 'Outside'
        ]]
        train_df, test_df = train_test_split(cm_df,
                                             test_size=0.2,
                                             random_state=seed,
                                             stratify=cm_df['Treatment'])

        print(
            '\n[Estimate propensity scores for Inverse Probability Weighting.]'
        )
        CausalLift(train_df, test_df, enable_ipw=True, verbose=3)

    # Use each class as a binary outcome
    else:
        # Get each outcome and loop through them testing them as binary variables
        outcome_names = set(cm_df['Description'].values)
        print(outcome_names)

        for outcome in outcome_names:
            cm_df_copy = cm_df.copy()
            print('\n{}'.format(decoder[outcome]))
            cm_df_copy['Outcome'] = np.where(
                cm_df_copy['Description'] == outcome, 1, 0)
            print(cm_df_copy['Outcome'])
            # Use the top 10 features
            cm_df_copy = cm_df_copy[[
                'Treatment', 'Outcome', 'Weapon_FIREARM', 'Weapon_HANDS',
                'Weapon_KNIFE', 'Weapon_NONE', 'Weapon_OTHER', 'Neighborhood',
                'Premise', 'Month', 'Hour', 'Outside'
            ]]

            train_df, test_df = train_test_split(
                cm_df_copy,
                test_size=0.2,
                random_state=seed,
                stratify=cm_df_copy['Treatment'])

            print(
                '\n[Estimate propensity scores for Inverse Probability Weighting.]'
            )
            CausalLift(train_df, test_df, enable_ipw=True, verbose=3)