def test_prob_alive_is_close_to_Hardie_paper_table_6(self, donations): """Table 6: P(Alive in 2002) as a Function of Recency and Frequency""" bbtf = estimation.BetaGeoBetaBinomFitter() bbtf.fit( donations['frequency'], donations['recency'], donations['periods'], donations['weights'], ) bbtf.data['prob_alive'] = bbtf.conditional_probability_alive( 1, donations['frequency'], donations['recency'], donations['periods']) # Expected probabilities for last year 1995-0 repeat, 1999-2 repeat, 2001-6 repeat expected = np.array([0.11, 0.59, 0.93]) prob_list = np.zeros(3) prob_list[0] = (bbtf.data[(bbtf.data['frequency'] == 0) & (bbtf.data['recency'] == 0)]['prob_alive']) prob_list[1] = (bbtf.data[(bbtf.data['frequency'] == 2) & (bbtf.data['recency'] == 4)]['prob_alive']) prob_list[2] = (bbtf.data[(bbtf.data['frequency'] == 6) & (bbtf.data['recency'] == 6)]['prob_alive']) npt.assert_array_almost_equal(expected, prob_list, decimal=2)
def test_fit_with_index(self, donations): bbtf = estimation.BetaGeoBetaBinomFitter() index = range(len(donations), 0, -1) bbtf.fit(donations['frequency'], donations['recency'], donations['periods'], donations['weights'], index=index) assert (bbtf.data.index == index).all() == True bbtf = estimation.BetaGeoBetaBinomFitter() bbtf.fit(donations['frequency'], donations['recency'], donations['periods'], donations['weights'], index=None) assert (bbtf.data.index == index).all() == False
def test_params_out_is_close_to_Hardie_paper(self): bbtf = estimation.BetaGeoBetaBinomFitter() bbtf.fit( donations['frequency'], donations['recency'], donations['n'], donations['n_custs'], ) expected = np.array([1.204, 0.750, 0.657, 2.783]) npt.assert_array_almost_equal(expected, np.array(bbtf._unload_params('alpha','beta','gamma','delta')), decimal=2)
def test_expected_purchases_in_n_periods_returns_same_value_as_Hardie_excel_sheet(self): """Total expected from Hardie's In-Sample Fit sheet.""" bbtf = estimation.BetaGeoBetaBinomFitter() bbtf.fit( donations['frequency'], donations['recency'], donations['n'], donations['n_custs'], ) expected = np.array([3454.9, 1253.1]) # Cells C18 and C24 estimated = bbtf.expected_number_of_transactions_in_first_n_periods(6).loc[[0,6]].values.flatten() npt.assert_almost_equal(expected, estimated, decimal=0)
def test_params_same_from_sim_data(self, bbgb_params): sim_data = beta_geometric_beta_binom_model(N=6, size=100000, **bbgb_params) bbtf = estimation.BetaGeoBetaBinomFitter() grouped_data = sim_data.groupby(['frequency', 'recency', 'n_periods'])['customer_id'].count() grouped_data = grouped_data.reset_index().rename(columns={'customer_id': 'weights'}) bbtf.fit(grouped_data['frequency'], grouped_data['recency'], grouped_data['n_periods'], grouped_data['weights']) npt.assert_allclose( np.asarray(list(bbgb_params.values())).astype(float), np.asarray(bbtf._unload_params('alpha', 'beta', 'gamma', 'delta')).astype(float), atol=0.1, rtol=1e-2)
def test_fit_with_and_without_weights(self, donations): exploded_dataset = pd.DataFrame( columns=['frequency', 'recency', 'periods']) for _, row in donations.iterrows(): exploded_dataset = exploded_dataset.append( pd.DataFrame( [[row['frequency'], row['recency'], row['periods']]] * row['weights'], columns=['frequency', 'recency', 'periods'])) exploded_dataset = exploded_dataset.astype(np.int64) assert exploded_dataset.shape[0] == donations['weights'].sum() bbtf_noweights = estimation.BetaGeoBetaBinomFitter() bbtf_noweights.fit( exploded_dataset['frequency'], exploded_dataset['recency'], exploded_dataset['periods'], ) bbtf = estimation.BetaGeoBetaBinomFitter() bbtf.fit( donations['frequency'], donations['recency'], donations['periods'], donations['weights'], ) npt.assert_array_almost_equal( np.array( bbtf_noweights._unload_params('alpha', 'beta', 'gamma', 'delta')), np.array(bbtf._unload_params('alpha', 'beta', 'gamma', 'delta')), decimal=4)
def test_params_same_from_sim_data(self): sim_data = beta_geometric_beta_binom_model(N=6, size=100000, **bbgb_params) bbtf = estimation.BetaGeoBetaBinomFitter() grouped_data = sim_data.groupby(['frequency', 'recency', 'n'])['customer_id'].count() grouped_data = grouped_data.reset_index().rename( columns={'customer_id': 'n_custs'}) bbtf.fit(grouped_data['frequency'], grouped_data['recency'], grouped_data['n'], grouped_data['n_custs']) assert ( (np.array(list(bbgb_params.values())) - np.array(bbtf._unload_params('alpha', 'beta', 'gamma', 'delta'))) < 0.1).all()
def test_conditional_expectation_returns_same_value_as_Hardie_excel_sheet(self): """ Total from Hardie's Conditional Expectations (II) sheet. http://brucehardie.com/notes/010/BGBB_2011-01-20_XLSX.zip """ bbtf = estimation.BetaGeoBetaBinomFitter() bbtf.fit( donations['frequency'], donations['recency'], donations['n'], donations['n_custs'], ) pred_purchases = bbtf.conditional_expected_number_of_purchases_up_to_time(5) * donations['n_custs'] expected = 12884.2 # Sum of column F Exp Tot npt.assert_almost_equal(expected, pred_purchases.sum(), decimal=0)