def test_fit_with_and_without_weights(self, cdnow_customers): original_dataset_with_weights = cdnow_customers.copy() original_dataset_with_weights = original_dataset_with_weights.groupby( ['frequency', 'recency', 'T']).size() original_dataset_with_weights = original_dataset_with_weights.reset_index( ) original_dataset_with_weights = original_dataset_with_weights.rename( columns={0: 'weights'}) pnbd_noweights = estimation.ParetoNBDFitter() pnbd_noweights.fit( cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], ) pnbd = estimation.ParetoNBDFitter() pnbd.fit( original_dataset_with_weights['frequency'], original_dataset_with_weights['recency'], original_dataset_with_weights['T'], original_dataset_with_weights['weights'], ) npt.assert_array_almost_equal( np.array(pnbd_noweights._unload_params('r', 'alpha', 's', 'beta')), np.array(pnbd._unload_params('r', 'alpha', 's', 'beta')), decimal=2)
def test_fit_with_index(self, cdnow_customers): ptf = estimation.ParetoNBDFitter() index = range(len(cdnow_customers), 0, -1) ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], index=index) assert (ptf.data.index == index).all() == True ptf = estimation.ParetoNBDFitter() ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], index=None) assert (ptf.data.index == index).all() == False
def test_conditional_probability_alive_is_between_0_and_1(self, cdnow_customers): ptf = estimation.ParetoNBDFitter() ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) for freq in np.arange(0, 100, 10.): for recency in np.arange(0, 100, 10.): for t in np.arange(recency, 100, 10.): assert 0.0 <= ptf.conditional_probability_alive(freq, recency, t) <= 1.0
def test_conditional_probability_alive_is_between_0_and_1(self): ptf = estimation.ParetoNBDFitter() ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) for i in range(0, 100, 10): for j in range(0, 100, 10): for k in range(j, 100, 10): assert 0 <= ptf.conditional_probability_alive(i, j, k) <= 1.0
def test_conditional_probability_alive_matrix(self, cdnow_customers): ptf = estimation.ParetoNBDFitter() ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) Z = ptf.conditional_probability_alive_matrix() max_t = int(ptf.data['T'].max()) for t_x in range(Z.shape[0]): for x in range(Z.shape[1]): assert Z[t_x][x] == ptf.conditional_probability_alive(x, t_x, max_t)
def test_expectation_returns_same_value_as_R_BTYD(self, cdnow_customers): """ From https://cran.r-project.org/web/packages/BTYD/BTYD.pdf """ ptf = estimation.ParetoNBDFitter() ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], tol=1e-6) expected = np.array([0.00000000, 0.05077821, 0.09916088, 0.14542507, 0.18979930, 0.23247466, 0.27361274, 0.31335159, 0.35181024, 0.38909211]) actual = ptf.expected_number_of_purchases_up_to_time(range(10)) npt.assert_allclose(expected, actual, atol=0.01)
def test_overflow_error(self): ptf = estimation.ParetoNBDFitter() params = np.array([10.465, 7.98565181e-03, 3.0516, 2.820]) freq = np.array([400., 500., 500.]) rec = np.array([5., 1., 4.]) age = np.array([6., 37., 37.]) assert all([r < 0 and not np.isinf(r) and not pd.isnull(r) for r in ptf._log_A_0(params, freq, rec, age)])
def test_conditional_probability_alive_overflow_error(self): ptf = estimation.ParetoNBDFitter() ptf.params_ = OrderedDict( zip(['r', 'alpha', 's', 'beta'], [10.465, 7.98565181e-03, 3.0516, 2.820])) freq = np.array([400., 500., 500.]) rec = np.array([5., 1., 4.]) age = np.array([6., 37., 37.]) assert all([r <= 1 and r >= 0 and not np.isinf(r) and not pd.isnull(r) for r in ptf.conditional_probability_alive(freq, rec, age)])
def test_params_out_is_close_to_Hardie_paper(self, cdnow_customers): ptf = estimation.ParetoNBDFitter() ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], iterative_fitting=3) expected = np.array([0.553, 10.578, 0.606, 11.669]) npt.assert_array_almost_equal( expected, np.array(ptf._unload_params('r', 'alpha', 's', 'beta')), decimal=3)
def test_conditional_expectation_returns_same_value_as_R_BTYD(self, cdnow_customers): """ From https://cran.r-project.org/web/packages/BTYD/vignettes/BTYD-walkthrough.pdf """ ptf = estimation.ParetoNBDFitter() ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) x = 26.00 t_x = 30.86 T = 31 t = 52 expected = 25.46 actual = ptf.conditional_expected_number_of_purchases_up_to_time(t, x, t_x, T) assert abs(expected - actual) < 0.01
def test_conditional_probability_alive(self, cdnow_customers): """ Target taken from page 8, https://cran.r-project.org/web/packages/BTYD/vignettes/BTYD-walkthrough.pdf """ ptf = estimation.ParetoNBDFitter() ptf.params_ = OrderedDict( zip(['r', 'alpha', 's', 'beta'], [0.5534, 10.5802, 0.6061, 11.6562])) p_alive = ptf.conditional_probability_alive(26.00, 30.86, 31.00) assert abs(p_alive - 0.9979) < 0.001
def test_conditional_expectation_underflow(self): """ Test a pair of inputs for the ParetoNBD ptf.conditional_expected_number_of_purchases_up_to_time(). For a small change in the input, the result shouldn't change dramatically -- however, if the function doesn't guard against numeric underflow, this change in input will result in an underflow error. """ ptf = estimation.ParetoNBDFitter() alpha = 10.58 beta = 11.67 r = 0.55 s = 0.61 ptf.params_ = OrderedDict({'alpha':alpha, 'beta':beta, 'r':r, 's':s}) # small change in inputs left = ptf.conditional_expected_number_of_purchases_up_to_time(10, 132, 200, 200) # 6.2060517889632418 right = ptf.conditional_expected_number_of_purchases_up_to_time(10, 133, 200, 200) # 6.2528722475748113 assert abs(left - right) < 0.05
def test_conditional_probability_of_n_purchases_up_to_time_is_between_0_and_1( self, cdnow_customers): """ Due to the large parameter space we take a random subset. """ ptf = estimation.ParetoNBDFitter() ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) for freq in np.random.choice(100, 5): for recency in np.random.choice(100, 5): for age in recency + np.random.choice(100, 5): for t in np.random.choice(100, 5): for n in np.random.choice(10, 5): assert ( 0.0 <= ptf. conditional_probability_of_n_purchases_up_to_time( n, t, freq, recency, age) <= 1.0)
def test_Ex_estimation_and_error(self): ptf = estimation.ParetoNBDFitter() frequency = [0, 0, 0, 1, 1, 0, 2, 5, 6, 6, 0, 10] recency = [0, 0, 0, 1, 10, 0, 8, 8, 9, 9, 0, 10] T = [10] * len(frequency) ptf.fit(frequency, recency, T, initial_params=[0.5, 2, 0.5, 0.5]) t = 100 C = [[0.02, 0, 0, 0], [0, 1.0, 0, 0], [0, 0, 0.03, 0.0], [0, 0, 0, 2.0]] Ex = ptf.expected_number_of_purchases_up_to_time(t) Ex_err = ptf.expected_number_of_purchases_up_to_time_error(t, C) assert 25 > Ex > 15 assert Ex_err > 0 compressed_frequency = [0, 1, 1, 2, 5, 6, 10] compressed_recency = [0, 1, 10, 8, 8, 9, 10] compressed_T = [10, 10, 10, 10, 10, 10, 10] ptf.fit(compressed_frequency, compressed_recency, compressed_T, initial_params=[0.5, 2, 0.5, 0.5]) t = 100 C = [[0.02, 0, 0, 0], [0, 1.0, 0, 0], [0, 0, 0.03, 0.0], [0, 0, 0, 2.0]] Ex2 = ptf.expected_number_of_purchases_up_to_time(t) Ex2_err = ptf.expected_number_of_purchases_up_to_time_error(t, C) assert 25 > Ex > 15 assert Ex_err > 0 ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) Ex = ptf.expected_number_of_purchases_up_to_time(t) Ex_err = ptf.expected_number_of_purchases_up_to_time_error(t, C) assert Ex > 0 assert Ex_err > 0
def test_conditional_probability_of_n_purchases_up_to_time_adds_up_to_1( self, cdnow_customers): """ Due to the large parameter space we take a random subset. We also restrict our limits to keep the number of values of n for which the probability needs to be calculated to a sane level. """ ptf = estimation.ParetoNBDFitter() ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T']) for freq in np.random.choice(10, 5): for recency in np.random.choice(9, 5): for age in np.random.choice(np.arange(recency, 10, 1), 5): for t in 1 + np.random.choice(9, 5): npt.assert_almost_equal(np.sum([ ptf. conditional_probability_of_n_purchases_up_to_time( n, t, freq, recency, age) for n in np.arange(0, 20, 1) ]), 1.0, decimal=2)