Пример #1
0
    def test_prob_alive_is_close_to_Hardie_paper_table_6(self, donations):
        """Table 6: P(Alive in 2002) as a Function of Recency and Frequency"""

        bbtf = estimation.BetaGeoBetaBinomFitter()
        bbtf.fit(
            donations['frequency'],
            donations['recency'],
            donations['periods'],
            donations['weights'],
        )

        bbtf.data['prob_alive'] = bbtf.conditional_probability_alive(
            1, donations['frequency'], donations['recency'],
            donations['periods'])

        # Expected probabilities for last year 1995-0 repeat, 1999-2 repeat, 2001-6 repeat
        expected = np.array([0.11, 0.59, 0.93])
        prob_list = np.zeros(3)
        prob_list[0] = (bbtf.data[(bbtf.data['frequency'] == 0)
                                  & (bbtf.data['recency'] == 0)]['prob_alive'])
        prob_list[1] = (bbtf.data[(bbtf.data['frequency'] == 2)
                                  & (bbtf.data['recency'] == 4)]['prob_alive'])
        prob_list[2] = (bbtf.data[(bbtf.data['frequency'] == 6)
                                  & (bbtf.data['recency'] == 6)]['prob_alive'])
        npt.assert_array_almost_equal(expected, prob_list, decimal=2)
Пример #2
0
    def test_fit_with_index(self, donations):

        bbtf = estimation.BetaGeoBetaBinomFitter()
        index = range(len(donations), 0, -1)
        bbtf.fit(donations['frequency'],
                 donations['recency'],
                 donations['periods'],
                 donations['weights'],
                 index=index)
        assert (bbtf.data.index == index).all() == True

        bbtf = estimation.BetaGeoBetaBinomFitter()
        bbtf.fit(donations['frequency'],
                 donations['recency'],
                 donations['periods'],
                 donations['weights'],
                 index=None)
        assert (bbtf.data.index == index).all() == False
Пример #3
0
    def test_params_out_is_close_to_Hardie_paper(self):

        bbtf = estimation.BetaGeoBetaBinomFitter()
        bbtf.fit(
            donations['frequency'],
            donations['recency'],
            donations['n'],
            donations['n_custs'],
        )
        expected = np.array([1.204, 0.750, 0.657, 2.783])
        npt.assert_array_almost_equal(expected, np.array(bbtf._unload_params('alpha','beta','gamma','delta')),
                                      decimal=2)
Пример #4
0
    def test_expected_purchases_in_n_periods_returns_same_value_as_Hardie_excel_sheet(self):
        """Total expected from Hardie's In-Sample Fit sheet."""

        bbtf = estimation.BetaGeoBetaBinomFitter()
        bbtf.fit(
            donations['frequency'],
            donations['recency'],
            donations['n'],
            donations['n_custs'],
        )
        expected = np.array([3454.9, 1253.1]) # Cells C18 and C24
        estimated = bbtf.expected_number_of_transactions_in_first_n_periods(6).loc[[0,6]].values.flatten()
        npt.assert_almost_equal(expected, estimated, decimal=0)
Пример #5
0
    def test_params_same_from_sim_data(self, bbgb_params):
        sim_data = beta_geometric_beta_binom_model(N=6, size=100000, **bbgb_params)
        bbtf = estimation.BetaGeoBetaBinomFitter()
        grouped_data = sim_data.groupby(['frequency', 'recency', 'n_periods'])['customer_id'].count()
        grouped_data = grouped_data.reset_index().rename(columns={'customer_id': 'weights'})
        bbtf.fit(grouped_data['frequency'],
                 grouped_data['recency'],
                 grouped_data['n_periods'],
                 grouped_data['weights'])

        npt.assert_allclose(
            np.asarray(list(bbgb_params.values())).astype(float),
            np.asarray(bbtf._unload_params('alpha', 'beta', 'gamma', 'delta')).astype(float),
            atol=0.1, rtol=1e-2)
Пример #6
0
    def test_fit_with_and_without_weights(self, donations):

        exploded_dataset = pd.DataFrame(
            columns=['frequency', 'recency', 'periods'])

        for _, row in donations.iterrows():
            exploded_dataset = exploded_dataset.append(
                pd.DataFrame(
                    [[row['frequency'], row['recency'], row['periods']]] *
                    row['weights'],
                    columns=['frequency', 'recency', 'periods']))

        exploded_dataset = exploded_dataset.astype(np.int64)
        assert exploded_dataset.shape[0] == donations['weights'].sum()

        bbtf_noweights = estimation.BetaGeoBetaBinomFitter()
        bbtf_noweights.fit(
            exploded_dataset['frequency'],
            exploded_dataset['recency'],
            exploded_dataset['periods'],
        )

        bbtf = estimation.BetaGeoBetaBinomFitter()
        bbtf.fit(
            donations['frequency'],
            donations['recency'],
            donations['periods'],
            donations['weights'],
        )

        npt.assert_array_almost_equal(
            np.array(
                bbtf_noweights._unload_params('alpha', 'beta', 'gamma',
                                              'delta')),
            np.array(bbtf._unload_params('alpha', 'beta', 'gamma', 'delta')),
            decimal=4)
 def test_params_same_from_sim_data(self):
     sim_data = beta_geometric_beta_binom_model(N=6,
                                                size=100000,
                                                **bbgb_params)
     bbtf = estimation.BetaGeoBetaBinomFitter()
     grouped_data = sim_data.groupby(['frequency', 'recency',
                                      'n'])['customer_id'].count()
     grouped_data = grouped_data.reset_index().rename(
         columns={'customer_id': 'n_custs'})
     bbtf.fit(grouped_data['frequency'], grouped_data['recency'],
              grouped_data['n'], grouped_data['n_custs'])
     assert (
         (np.array(list(bbgb_params.values())) -
          np.array(bbtf._unload_params('alpha', 'beta', 'gamma', 'delta')))
         < 0.1).all()
Пример #8
0
    def test_conditional_expectation_returns_same_value_as_Hardie_excel_sheet(self):
        """
        Total from Hardie's Conditional Expectations (II) sheet.

        http://brucehardie.com/notes/010/BGBB_2011-01-20_XLSX.zip

        """

        bbtf = estimation.BetaGeoBetaBinomFitter()
        bbtf.fit(
            donations['frequency'],
            donations['recency'],
            donations['n'],
            donations['n_custs'],
        )
        pred_purchases = bbtf.conditional_expected_number_of_purchases_up_to_time(5) * donations['n_custs']
        expected = 12884.2 # Sum of column F Exp Tot
        npt.assert_almost_equal(expected, pred_purchases.sum(), decimal=0)