def test_BaseXClassifier(generate_classification_data): np.random.seed(RANDOM_SEED) df, x_names = generate_classification_data() df['treatment_group_key'] = np.where(df['treatment_group_key'] == CONTROL_NAME, 0, 1) propensity_model = LogisticRegression() propensity_model.fit(X=df[x_names].values, y=df['treatment_group_key'].values) df['propensity_score'] = propensity_model.predict_proba(df[x_names].values)[:, 1] df_train, df_test = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED) # specify all 4 learners uplift_model = BaseXClassifier(control_outcome_learner=XGBClassifier(), control_effect_learner=XGBRegressor(), treatment_outcome_learner=XGBClassifier(), treatment_effect_learner=XGBRegressor()) uplift_model.fit(X=df_train[x_names].values, treatment=df_train['treatment_group_key'].values, y=df_train[CONVERSION].values) tau_pred = uplift_model.predict(X=df_test[x_names].values, p=df_test['propensity_score'].values) # specify 2 learners uplift_model = BaseXClassifier(outcome_learner=XGBClassifier(), effect_learner=XGBRegressor()) uplift_model.fit(X=df_train[x_names].values, treatment=df_train['treatment_group_key'].values, y=df_train[CONVERSION].values) tau_pred = uplift_model.predict(X=df_test[x_names].values, p=df_test['propensity_score'].values) # calculate metrics auuc_metrics = pd.DataFrame({'tau_pred': tau_pred.flatten(), 'W': df_test['treatment_group_key'].values, CONVERSION: df_test[CONVERSION].values, 'treatment_effect_col': df_test['treatment_effect'].values}) cumgain = get_cumgain(auuc_metrics, outcome_col=CONVERSION, treatment_col='W', treatment_effect_col='treatment_effect_col') # Check if the cumulative gain when using the model's prediction is # higher than it would be under random targeting assert cumgain['tau_pred'].sum() > cumgain['Random'].sum()
def test_BaseXClassifier(generate_classification_data): np.random.seed(RANDOM_SEED) df, x_names = generate_classification_data() df['treatment_group_key'] = np.where( df['treatment_group_key'] == CONTROL_NAME, 0, 1) propensity_model = LogisticRegression() propensity_model.fit(X=df[x_names].values, y=df['treatment_group_key'].values) df['propensity_score'] = propensity_model.predict_proba( df[x_names].values)[:, 1] df_train, df_test = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED) uplift_model = BaseXClassifier(learner=XGBRegressor(), control_outcome_learner=XGBClassifier(), treatment_outcome_learner=XGBClassifier()) uplift_model.fit(X=df_train[x_names].values, treatment=df_train['treatment_group_key'].values, y=df_train[CONVERSION].values) y_pred = uplift_model.predict(X=df_test[x_names].values, p=df_test['propensity_score'].values) auuc_metrics = pd.DataFrame(np.c_[y_pred, df_test['treatment_group_key'].values, df_test[CONVERSION].values], columns=['y_pred', 'W', CONVERSION]) cumgain = get_cumgain(auuc_metrics, outcome_col=CONVERSION, treatment_col='W', steps=20) # Check if the cumulative gain when using the model's prediction is # higher than it would be under random targeting assert cumgain['y_pred'].sum() > cumgain['Random'].sum()