def test_conditional_probability_alive_returns_1_if_no_repeat_purchases( self, cdnow_customers): bfg = estimation.BetaGeoFitter() bfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) assert bfg.conditional_probability_alive(0, 1, 1) == 1.0
def test_customer_lifetime_value_with_bgf(self): ggf = estimation.GammaGammaFitter() ggf.params_ = OrderedDict({'p': 6.25, 'q': 3.74, 'v': 15.44}) bgf = estimation.BetaGeoFitter() bgf.fit(cdnow_customers_with_monetary_value['frequency'], cdnow_customers_with_monetary_value['recency'], cdnow_customers_with_monetary_value['T'], iterative_fitting=3) ggf_clv = ggf.customer_lifetime_value( bgf, cdnow_customers_with_monetary_value['frequency'], cdnow_customers_with_monetary_value['recency'], cdnow_customers_with_monetary_value['T'], cdnow_customers_with_monetary_value['monetary_value']) utils_clv = utils.customer_lifetime_value( bgf, cdnow_customers_with_monetary_value['frequency'], cdnow_customers_with_monetary_value['recency'], cdnow_customers_with_monetary_value['T'], ggf.conditional_expected_average_profit( cdnow_customers_with_monetary_value['frequency'], cdnow_customers_with_monetary_value['monetary_value'])) npt.assert_equal(ggf_clv.values, utils_clv.values)
def test_probability_of_n_purchases_up_to_time_same_as_R_BTYD(self): """ See https://cran.r-project.org/web/packages/BTYD/BTYD.pdf """ from collections import OrderedDict bgf = estimation.BetaGeoFitter() bgf.params_ = OrderedDict({ 'r': 0.243, 'alpha': 4.414, 'a': 0.793, 'b': 2.426 }) # probability that a customer will make 10 repeat transactions in the # time interval (0,2] expected = 1.07869e-07 actual = bgf.probability_of_n_purchases_up_to_time(2, 10) assert abs(expected - actual) < 10e-5 # probability that a customer will make no repeat transactions in the # time interval (0,39] expected = 0.5737864 actual = bgf.probability_of_n_purchases_up_to_time(39, 0) assert abs(expected - actual) < 10e-5 # PMF expected = np.array([ 0.0019995214, 0.0015170236, 0.0011633150, 0.0009003148, 0.0007023638, 0.0005517902, 0.0004361913, 0.0003467171, 0.0002769613, 0.0002222260 ]) actual = np.array([ bgf.probability_of_n_purchases_up_to_time(30, n) for n in range(11, 21) ]) npt.assert_array_almost_equal(expected, actual, decimal=5)
def test_save_load_bgnbd_no_data(self, cdnow_customers): bgf = estimation.BetaGeoFitter(penalizer_coef=0.0) bgf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) bgf.save_model(PATH_SAVE_BGNBD_MODEL, save_data=False) bgf_new = estimation.BetaGeoFitter() bgf_new.load_model(PATH_SAVE_BGNBD_MODEL) assert bgf_new.__dict__['penalizer_coef'] == bgf.__dict__['penalizer_coef'] assert bgf_new.__dict__['_scale'] == bgf.__dict__['_scale'] assert bgf_new.__dict__['params_'] == bgf.__dict__['params_'] assert bgf_new.__dict__['_negative_log_likelihood_'] == bgf.__dict__['_negative_log_likelihood_'] assert bgf_new.__dict__['predict'](1, 1, 2, 5) == bgf.__dict__['predict'](1, 1, 2, 5) assert bgf_new.expected_number_of_purchases_up_to_time(1) == bgf.expected_number_of_purchases_up_to_time(1) assert isinstance(bgf_new.__dict__['data'], list) # remove saved model os.remove(PATH_SAVE_BGNBD_MODEL)
def test_conditional_probability_alive_is_between_0_and_1(self, cdnow_customers): bfg = estimation.BetaGeoFitter() bfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) for i in range(0, 100, 10): for j in range(0, 100, 10): for k in range(j, 100, 10): assert 0 <= bfg.conditional_probability_alive(i, j, k) <= 1.0
def test_expectation_returns_same_value_Hardie_excel_sheet(self, cdnow_customers): bfg = estimation.BetaGeoFitter() bfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], tol=1e-6) times = np.array([0.1429, 1.0, 3.00, 31.8571, 32.00, 78.00]) expected = np.array([0.0078 ,0.0532 ,0.1506 ,1.0405,1.0437, 1.8576]) actual = bfg.expected_number_of_purchases_up_to_time(times) npt.assert_array_almost_equal(actual, expected, decimal=3)
def test_save_load_bgnbd(self, cdnow_customers): """Test saving and loading model for BG/NBD.""" bgf = estimation.BetaGeoFitter(penalizer_coef=0.0) bgf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) bgf.save_model(PATH_SAVE_BGNBD_MODEL) bgf_new = estimation.BetaGeoFitter() bgf_new.load_model(PATH_SAVE_BGNBD_MODEL) assert bgf_new.__dict__['penalizer_coef'] == bgf.__dict__['penalizer_coef'] assert bgf_new.__dict__['_scale'] == bgf.__dict__['_scale'] assert bgf_new.__dict__['params_'] == bgf.__dict__['params_'] assert bgf_new.__dict__['_negative_log_likelihood_'] == bgf.__dict__['_negative_log_likelihood_'] assert (bgf_new.__dict__['data'] == bgf.__dict__['data']).all().all() assert bgf_new.__dict__['predict'](1, 1, 2, 5) == bgf.__dict__['predict'](1, 1, 2, 5) assert bgf_new.expected_number_of_purchases_up_to_time(1) == bgf.expected_number_of_purchases_up_to_time(1) # remove saved model os.remove(PATH_SAVE_BGNBD_MODEL)
def test_save_load_bgnbd_no_data_replace_with_empty_str(self, cdnow_customers): """Test saving and loading model for BG/NBD without data with replaced value empty str.""" bgf = estimation.BetaGeoFitter(penalizer_coef=0.0) bgf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) bgf.save_model(PATH_SAVE_BGNBD_MODEL, save_data=False, values_to_save=['']) bgf_new = estimation.BetaGeoFitter() bgf_new.load_model(PATH_SAVE_BGNBD_MODEL) assert bgf_new.__dict__['penalizer_coef'] == bgf.__dict__['penalizer_coef'] assert bgf_new.__dict__['_scale'] == bgf.__dict__['_scale'] assert bgf_new.__dict__['params_'] == bgf.__dict__['params_'] assert bgf_new.__dict__['_negative_log_likelihood_'] == bgf.__dict__['_negative_log_likelihood_'] assert bgf_new.__dict__['predict'](1, 1, 2, 5) == bgf.__dict__['predict'](1, 1, 2, 5) assert bgf_new.expected_number_of_purchases_up_to_time(1) == bgf.expected_number_of_purchases_up_to_time(1) assert bgf_new.__dict__['data'] is '' # remove saved model os.remove(PATH_SAVE_BGNBD_MODEL)
def test_params_out_is_close_to_Hardie_paper(self, cdnow_customers): bfg = estimation.BetaGeoFitter() bfg.fit(cdnow_customers['x'], cdnow_customers['t_x'], cdnow_customers['T']) expected = np.array([0.243, 4.414, 0.793, 2.426]) npt.assert_array_almost_equal( expected, np.array(bfg._unload_params('r', 'alpha', 'a', 'b')), decimal=3)
def test_penalizer_term_will_shrink_coefs_to_0(self, cdnow_customers): bfg_no_penalizer = estimation.BetaGeoFitter() bfg_no_penalizer.fit(cdnow_customers['x'], cdnow_customers['t_x'], cdnow_customers['T']) params_1 = np.array(list(bfg_no_penalizer.params_.values())) bfg_with_penalizer = estimation.BetaGeoFitter(penalizer_coef=0.1) bfg_with_penalizer.fit(cdnow_customers['x'], cdnow_customers['t_x'], cdnow_customers['T']) params_2 = np.array(list(bfg_with_penalizer.params_.values())) assert np.all(params_2 < params_1) bfg_with_more_penalizer = estimation.BetaGeoFitter(penalizer_coef=10) bfg_with_more_penalizer.fit(cdnow_customers['x'], cdnow_customers['t_x'], cdnow_customers['T']) params_3 = np.array(list(bfg_with_more_penalizer.params_.values())) assert np.all(params_3 < params_2)
def test_fit_with_index(self, cdnow_customers): bgf = estimation.BetaGeoFitter(penalizer_coef=0.0) index = range(len(cdnow_customers), 0, -1) bgf.fit( cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], index=index ) assert (bgf.data.index == index).all() == True bgf = estimation.BetaGeoFitter(penalizer_coef=0.0) bgf.fit( cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], index=None ) assert (bgf.data.index == index).all() == False
def test_conditional_probability_alive_matrix(self, cdnow_customers): bfg = estimation.BetaGeoFitter() bfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) Z = bfg.conditional_probability_alive_matrix() max_t = int(bfg.data['T'].max()) assert Z[0][0] == 1 for t_x in range(Z.shape[0]): for x in range(Z.shape[1]): assert Z[t_x][x] == bfg.conditional_probability_alive(x, t_x, max_t)
def test_conditional_expectation_returns_same_value_as_Hardie_excel_sheet(self, cdnow_customers): bfg = estimation.BetaGeoFitter() bfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) x = 2 t_x = 30.43 T = 38.86 t = 39 expected = 1.226 actual = bfg.conditional_expected_number_of_purchases_up_to_time(t, x, t_x, T) assert abs(expected - actual) < 0.001
def test_scaling_inputs_gives_same_or_similar_results(self): bgf = estimation.BetaGeoFitter() bgf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) scale = 10 bgf_with_large_inputs = estimation.BetaGeoFitter() bgf_with_large_inputs.fit(cdnow_customers['frequency'], scale * cdnow_customers['recency'], scale * cdnow_customers['T'], iterative_fitting=2) assert bgf_with_large_inputs._scale < 1. assert abs( bgf_with_large_inputs.conditional_probability_alive( 1, scale * 1, scale * 2) - bgf.conditional_probability_alive(1, 1, 2)) < 10e-5 assert abs( bgf_with_large_inputs.conditional_probability_alive( 1, scale * 2, scale * 10) - bgf.conditional_probability_alive(1, 2, 10)) < 10e-5
def test_using_weights_col_gives_correct_results(self, cdnow_customers): cdnow_customers_weights = cdnow_customers.copy() cdnow_customers_weights['weights'] = 1.0 cdnow_customers_weights = cdnow_customers_weights.groupby( ['frequency', 'recency', 'T'])['weights'].sum() cdnow_customers_weights = cdnow_customers_weights.reset_index() assert (cdnow_customers_weights['weights'] > 1).any() bgf_weights = estimation.BetaGeoFitter(penalizer_coef=0.0) bgf_weights.fit(cdnow_customers_weights['frequency'], cdnow_customers_weights['recency'], cdnow_customers_weights['T'], weights=cdnow_customers_weights['weights']) bgf_no_weights = estimation.BetaGeoFitter(penalizer_coef=0.0) bgf_no_weights.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) npt.assert_almost_equal( np.array(bgf_no_weights._unload_params('r', 'alpha', 'a', 'b')), np.array(bgf_weights._unload_params('r', 'alpha', 'a', 'b')), decimal=4)
def test_no_runtime_warnings_high_frequency(self, cdnow_customers): old_settings = np.seterr(all='raise') bgf = estimation.BetaGeoFitter(penalizer_coef=0.0) bgf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], index=None) p_alive = bgf.conditional_probability_alive(frequency=1000, recency=10, T=100) np.seterr(**old_settings) assert p_alive == 0.