def test_enable_ipw_without_known_propensity_parallel_runner(): seed = 0 df = generate_data( N=1000, n_features=3, beta=[0, -2, 3, -5], # Effect of [intercept and features] on outcome error_std=0.1, tau=[1, -5, -5, 10], # Effect of [intercept and features] on treated outcome tau_std=0.1, discrete_outcome=True, seed=seed, feature_effect=0, # Effect of beta on treated outxome propensity_coef=[ 0, -1, 1, -1, ], # Effect of [intercept and features] on propensity log-odds for treatment index_name="index", ) train_df, test_df = train_test_split(df, test_size=0.2, stratify=df["Treatment"]) cl = CausalLift( train_df, test_df, enable_ipw=True, verbose=3, runner="ParallelRunner" ) train_df, test_df = cl.estimate_cate_by_2_models() estimated_effect_df = cl.estimate_recommendation_impact() assert isinstance(train_df, pd.DataFrame) assert isinstance(test_df, pd.DataFrame) assert isinstance(estimated_effect_df, pd.DataFrame)
def test_enable_ipw_with_known_propensity(): seed = 0 df = generate_data( N=1000, n_features=3, beta=[0, -2, 3, -5], # Effect of [intercept and features] on outcome error_std=0.1, tau=[1, -5, -5, 10], # Effect of [intercept and features] on treated outcome tau_std=0.1, discrete_outcome=True, seed=seed, feature_effect=0, # Effect of beta on treated outxome propensity_coef=[ 0, -1, 1, -1, ], # Effect of [intercept and features] on propensity log-odds for treatment index_name="index", ) train_df, test_df = train_test_split(df, test_size=0.2, random_state=seed, stratify=df["Treatment"]) test_random_propensity = True if test_random_propensity: import random train_df = train_df.copy() train_df.loc[:, "Propensity"] = [ random.random() for _ in range(train_df.shape[0]) ] test_df = test_df.copy() test_df.loc[:, "Propensity"] = [ random.random() for _ in range(test_df.shape[0]) ] cl = CausalLift(train_df, test_df, enable_ipw=True, verbose=3) train_df, test_df = cl.estimate_cate_by_2_models() estimated_effect_df = cl.estimate_recommendation_impact() assert isinstance(train_df, pd.DataFrame) assert isinstance(test_df, pd.DataFrame) assert isinstance(estimated_effect_df, pd.DataFrame)
from causallift import CausalLift from sklearn.model_selection import train_test_split seed = 0 from causallift import generate_data df = generate_data( \ N=1000, n_features=3, beta=[0, -2, 3, -5], # Effect of [intercept and features] on outcome error_std=0.1, tau=[1, -5, -5, 10], # Effect of [intercept and features] on treated outcome tau_std=0.1, discrete_outcome=True, seed=seed, feature_effect=0, # Effect of beta on treated outxome propensity_coef=[0, -1, 1, -1], # Effect of [intercept and features] on propensity log-odds for treatment index_name='index') train_df, test_df = train_test_split(df, test_size=0.2, random_state=seed, stratify=df['Treatment']) cl = CausalLift(train_df, test_df, enable_ipw=False, random_state=0, verbose=3) train_df, test_df = cl.estimate_cate_by_2_models() estimated_effect_df = cl.estimate_recommendation_impact()
def test_skopt(): seed = 0 df = generate_data( N=1000, n_features=3, beta=[0, -2, 3, -5], # Effect of [intercept and features] on outcome error_std=0.1, tau=[1, -5, -5, 10], # Effect of [intercept and features] on treated outcome tau_std=0.1, discrete_outcome=True, seed=seed, feature_effect=0, # Effect of beta on treated outxome propensity_coef=[ 0, -1, 1, -1, ], # Effect of [intercept and features] on propensity log-odds for treatment index_name="index", ) train_df, test_df = train_test_split(df, test_size=0.2, random_state=seed, stratify=df["Treatment"]) uplift_model_params = dict( search_cv="skopt.BayesSearchCV", estimator="xgboost.XGBClassifier", const_params=dict( booster="gbtree", silent=True, objective="binary:logistic", base_score=0.5, eval_metric="auc", n_jobs=-1, seed=seed, ), search_spaces=dict(n_estimators=Integer(100, 200)), random_state=seed, scoring="roc_auc", cv=3, n_jobs=-1, n_iter=10, verbose=1, refit=True, ) cl = CausalLift( train_df, test_df, enable_ipw=True, verbose=3, uplift_model_params=uplift_model_params, ) train_df, test_df = cl.estimate_cate_by_2_models() estimated_effect_df = cl.estimate_recommendation_impact() assert isinstance(train_df, pd.DataFrame) assert isinstance(test_df, pd.DataFrame) assert isinstance(estimated_effect_df, pd.DataFrame) seed = 0 df = generate_data( N=1000, n_features=3, beta=[0, -2, 3, -5], # Effect of [intercept and features] on outcome error_std=0.1, tau=[1, -5, -5, 10], # Effect of [intercept and features] on treated outcome tau_std=0.1, discrete_outcome=True, seed=seed, feature_effect=0, # Effect of beta on treated outxome propensity_coef=[ 0, -1, 1, -1, ], # Effect of [intercept and features] on propensity log-odds for treatment index_name="index", ) train_df, test_df = train_test_split(df, test_size=0.2, random_state=seed, stratify=df["Treatment"]) estimator = XGBClassifier( booster="gbtree", silent=True, objective="binary:logistic", base_score=0.5, eval_metric="auc", n_jobs=-1, seed=seed, ) model = BayesSearchCV( estimator=estimator, search_spaces=dict(n_estimators=Integer(100, 200)), random_state=seed, scoring="roc_auc", cv=3, n_jobs=-1, n_iter=10, verbose=1, refit=True, ) cl_ext = CausalLift(train_df, test_df, enable_ipw=True, verbose=3, uplift_model_params=model) train_df_ext, test_df_ext = cl_ext.estimate_cate_by_2_models() estimated_effect_df_ext = cl_ext.estimate_recommendation_impact() assert isinstance(train_df_ext, pd.DataFrame) assert isinstance(test_df_ext, pd.DataFrame) assert isinstance(estimated_effect_df_ext, pd.DataFrame)
def causal_lift_model(cm_df, decoder, treatment_name, use_multi=True, seed=5): # Need to drop null for computations - could also impute these original_len = len(cm_df) print('Original data length {}'.format(original_len)) cm_df[treatment_name] = cm_df[treatment_name].dropna().reset_index( drop=True) clean_len = len(cm_df) print('Clean data length {}'.format(clean_len)) ratio_dropped = (original_len - clean_len) / clean_len # Separate treatment into two categories (below or above average) treatment_below_avg = cm_df[treatment_name].mean() cm_df['Treatment'] = np.where(cm_df[treatment_name] <= treatment_below_avg, 1, 0) # Use multiple classes as target if use_multi: cm_df['Outcome'] = cm_df['Description'] # Use the top 10 features cm_df = cm_df[[ 'Treatment', 'Outcome', 'Weapon_FIREARM', 'Weapon_HANDS', 'Weapon_KNIFE', 'Weapon_NONE', 'Weapon_OTHER', 'Neighborhood', 'Premise', 'Month', 'Hour', 'Outside' ]] train_df, test_df = train_test_split(cm_df, test_size=0.2, random_state=seed, stratify=cm_df['Treatment']) print( '\n[Estimate propensity scores for Inverse Probability Weighting.]' ) CausalLift(train_df, test_df, enable_ipw=True, verbose=3) # Use each class as a binary outcome else: # Get each outcome and loop through them testing them as binary variables outcome_names = set(cm_df['Description'].values) print(outcome_names) for outcome in outcome_names: cm_df_copy = cm_df.copy() print('\n{}'.format(decoder[outcome])) cm_df_copy['Outcome'] = np.where( cm_df_copy['Description'] == outcome, 1, 0) print(cm_df_copy['Outcome']) # Use the top 10 features cm_df_copy = cm_df_copy[[ 'Treatment', 'Outcome', 'Weapon_FIREARM', 'Weapon_HANDS', 'Weapon_KNIFE', 'Weapon_NONE', 'Weapon_OTHER', 'Neighborhood', 'Premise', 'Month', 'Hour', 'Outside' ]] train_df, test_df = train_test_split( cm_df_copy, test_size=0.2, random_state=seed, stratify=cm_df_copy['Treatment']) print( '\n[Estimate propensity scores for Inverse Probability Weighting.]' ) CausalLift(train_df, test_df, enable_ipw=True, verbose=3)