Exemplo n.º 1
0
def test_BaseXClassifier(generate_classification_data):

    np.random.seed(RANDOM_SEED)

    df, x_names = generate_classification_data()

    df['treatment_group_key'] = np.where(df['treatment_group_key'] == CONTROL_NAME, 0, 1)

    propensity_model = LogisticRegression()
    propensity_model.fit(X=df[x_names].values, y=df['treatment_group_key'].values)
    df['propensity_score'] = propensity_model.predict_proba(df[x_names].values)[:, 1]

    df_train, df_test = train_test_split(df,
                                         test_size=0.2,
                                         random_state=RANDOM_SEED)

    # specify all 4 learners
    uplift_model = BaseXClassifier(control_outcome_learner=XGBClassifier(),
                                   control_effect_learner=XGBRegressor(),
                                   treatment_outcome_learner=XGBClassifier(),
                                   treatment_effect_learner=XGBRegressor())

    uplift_model.fit(X=df_train[x_names].values,
                     treatment=df_train['treatment_group_key'].values,
                     y=df_train[CONVERSION].values)

    tau_pred = uplift_model.predict(X=df_test[x_names].values,
                                  p=df_test['propensity_score'].values)

    # specify 2 learners
    uplift_model = BaseXClassifier(outcome_learner=XGBClassifier(),
                                   effect_learner=XGBRegressor())

    uplift_model.fit(X=df_train[x_names].values,
                     treatment=df_train['treatment_group_key'].values,
                     y=df_train[CONVERSION].values)

    tau_pred = uplift_model.predict(X=df_test[x_names].values,
                                  p=df_test['propensity_score'].values)

    # calculate metrics
    auuc_metrics = pd.DataFrame({'tau_pred': tau_pred.flatten(),
                                 'W': df_test['treatment_group_key'].values,
                                 CONVERSION: df_test[CONVERSION].values,
                                 'treatment_effect_col': df_test['treatment_effect'].values})

    cumgain = get_cumgain(auuc_metrics,
                          outcome_col=CONVERSION,
                          treatment_col='W',
                          treatment_effect_col='treatment_effect_col')

    # Check if the cumulative gain when using the model's prediction is
    # higher than it would be under random targeting
    assert cumgain['tau_pred'].sum() > cumgain['Random'].sum()
Exemplo n.º 2
0
def test_BaseXClassifier(generate_classification_data):

    np.random.seed(RANDOM_SEED)

    df, x_names = generate_classification_data()

    df['treatment_group_key'] = np.where(
        df['treatment_group_key'] == CONTROL_NAME, 0, 1)

    propensity_model = LogisticRegression()
    propensity_model.fit(X=df[x_names].values,
                         y=df['treatment_group_key'].values)
    df['propensity_score'] = propensity_model.predict_proba(
        df[x_names].values)[:, 1]

    df_train, df_test = train_test_split(df,
                                         test_size=0.2,
                                         random_state=RANDOM_SEED)

    uplift_model = BaseXClassifier(learner=XGBRegressor(),
                                   control_outcome_learner=XGBClassifier(),
                                   treatment_outcome_learner=XGBClassifier())

    uplift_model.fit(X=df_train[x_names].values,
                     treatment=df_train['treatment_group_key'].values,
                     y=df_train[CONVERSION].values)

    y_pred = uplift_model.predict(X=df_test[x_names].values,
                                  p=df_test['propensity_score'].values)

    auuc_metrics = pd.DataFrame(np.c_[y_pred,
                                      df_test['treatment_group_key'].values,
                                      df_test[CONVERSION].values],
                                columns=['y_pred', 'W', CONVERSION])

    cumgain = get_cumgain(auuc_metrics,
                          outcome_col=CONVERSION,
                          treatment_col='W',
                          steps=20)

    # Check if the cumulative gain when using the model's prediction is
    # higher than it would be under random targeting
    assert cumgain['y_pred'].sum() > cumgain['Random'].sum()