Exemplo n.º 1
0
    def test_purchase_predictions_do_not_differ_much_if_looking_at_hourly_or_daily_frequencies(
            self):
        transaction_data = load_transaction_data(parse_dates=['date'])
        daily_summary = utils.summary_data_from_transaction_data(
            transaction_data,
            'id',
            'date',
            observation_period_end=max(transaction_data.date),
            freq='D')
        hourly_summary = utils.summary_data_from_transaction_data(
            transaction_data,
            'id',
            'date',
            observation_period_end=max(transaction_data.date),
            freq='h')
        thirty_days = 30
        hours_in_day = 24
        mbfg = estimation.ModifiedBetaGeoFitter()

        np.random.seed(0)
        mbfg.fit(daily_summary['frequency'], daily_summary['recency'],
                 daily_summary['T'])
        thirty_day_prediction_from_daily_data = mbfg.expected_number_of_purchases_up_to_time(
            thirty_days)

        np.random.seed(0)
        mbfg.fit(hourly_summary['frequency'], hourly_summary['recency'],
                 hourly_summary['T'])
        thirty_day_prediction_from_hourly_data = mbfg.expected_number_of_purchases_up_to_time(
            thirty_days * hours_in_day)

        npt.assert_almost_equal(thirty_day_prediction_from_daily_data,
                                thirty_day_prediction_from_hourly_data)
Exemplo n.º 2
0
    def test_plot_calibration_purchases_vs_holdout_purchases_time_since_last_purchase(self):
        transaction_data = load_transaction_data()
        summary = utils.calibration_and_holdout_data(transaction_data, 'id', 'date', '2014-09-01', '2014-12-31')
        bgf.fit(summary['frequency_cal'], summary['recency_cal'], summary['T_cal'])

        plt.figure()
        plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary, kind='time_since_last_purchase')
        return plt.gcf()
Exemplo n.º 3
0
    def test_plot_calibration_purchases_vs_holdout_purchases_time_since_last_purchase(self):
        transaction_data = load_transaction_data()
        summary = utils.calibration_and_holdout_data(transaction_data, "id", "date", "2014-09-01", "2014-12-31")
        bgf.fit(summary["frequency_cal"], summary["recency_cal"], summary["T_cal"])

        plt.figure()
        plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary, kind="time_since_last_purchase")
        return plt.gcf()
Exemplo n.º 4
0
    def test_plot_calibration_purchases_vs_holdout_purchases_time_since_last_purchase(self):
        transaction_data = load_transaction_data()
        summary = utils.calibration_and_holdout_data(transaction_data, 'id', 'date', '2014-09-01', '2014-12-31')
        bgf.fit(summary['frequency_cal'], summary['recency_cal'], summary['T_cal'])

        plt.figure()
        plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary, kind='time_since_last_purchase')
        return plt.gcf()
Exemplo n.º 5
0
 def test_plot_customer_alive_history(self):
     plt.figure()
     transaction_data = load_transaction_data()
     # yes I know this is using the wrong data, but I'm testing plotting here.
     id = 35
     days_since_birth = 200
     sp_trans = transaction_data.ix[transaction_data['id'] == id]
     plotting.plot_history_alive(bgf, days_since_birth, sp_trans, 'date')
     return plt.gcf()
Exemplo n.º 6
0
 def test_plot_customer_alive_history(self):
     plt.figure()
     transaction_data = load_transaction_data()
     # yes I know this is using the wrong data, but I'm testing plotting here.
     id = 35
     days_since_birth = 200
     sp_trans = transaction_data.ix[transaction_data['id'] == id]
     plotting.plot_history_alive(bgf, days_since_birth, sp_trans, 'date')
     return plt.gcf()
Exemplo n.º 7
0
    def test_plot_calibration_purchases_vs_holdout_purchases(self):
        from matplotlib import pyplot as plt 

        transaction_data = load_transaction_data()
        summary = utils.calibration_and_holdout_data(transaction_data, 'id', 'date', '2014-09-01', '2014-12-31')
        bgf.fit(summary['frequency_cal'], summary['recency_cal'], summary['T_cal'])
        
        plt.figure()
        plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary)
        plt.show()
Exemplo n.º 8
0
    def test_plot_customer_alive_history(self):
        from matplotlib import pyplot as plt

        transaction_data = load_transaction_data()
        # yes I know this is using the wrong data, but I'm testing plotting here.
        id = 35
        days_since_birth = 200
        sp_trans = transaction_data.ix[transaction_data['id'] == id]
        plt.figure()
        plotting.plot_history_alive(bgf, days_since_birth, sp_trans, 'date')
        plt.show()
Exemplo n.º 9
0
    def test_plot_customer_alive_history(self):
        from matplotlib import pyplot as plt

        transaction_data = load_transaction_data()
        # yes I know this is using the wrong data, but I'm testing plotting here.
        id = 35
        days_since_birth = 200
        sp_trans = transaction_data.ix[transaction_data['id'] == id]
        plt.figure()
        plotting.plot_history_alive(bgf, days_since_birth, sp_trans, 'date')
        plt.show()
Exemplo n.º 10
0
    def test_plot_calibration_purchases_vs_holdout_purchases(self):
        from matplotlib import pyplot as plt

        transaction_data = load_transaction_data()
        summary = utils.calibration_and_holdout_data(transaction_data, 'id',
                                                     'date', '2014-09-01',
                                                     '2014-12-31')
        bgf.fit(summary['frequency_cal'], summary['recency_cal'],
                summary['T_cal'])

        plt.figure()
        plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary)
        plt.show()
Exemplo n.º 11
0
    def test_plot_customer_alive_history(self, bgf):
        from datetime import datetime, timedelta

        x_expected = np.arange(datetime(2014, 6, 30), datetime(2015, 1, 17),
                               timedelta(days=1))
        y_expected = [
            1.0, 1.0, 1.0, 0.75, 0.72, 0.69, 0.67, 0.64, 0.62, 0.59, 0.57,
            0.55, 0.81, 0.79, 0.77, 0.75, 0.85, 0.87, 0.85, 0.82, 0.8, 0.89,
            0.87, 0.84, 0.82, 0.79, 0.76, 0.73, 0.69, 0.66, 0.62, 0.59, 0.9,
            0.89, 0.87, 0.85, 0.83, 0.81, 0.91, 0.9, 0.88, 0.87, 0.85, 0.83,
            0.8, 0.78, 0.75, 0.73, 0.7, 0.67, 0.64, 0.61, 0.57, 0.54, 0.51,
            0.48, 0.45, 0.42, 0.39, 0.37, 0.34, 0.32, 0.29, 0.27, 0.25, 0.23,
            0.21, 0.2, 0.18, 0.92, 0.93, 0.92, 0.91, 0.9, 0.89, 0.88, 0.87,
            0.85, 0.84, 0.82, 0.8, 0.79, 0.77, 0.75, 0.73, 0.71, 0.94, 0.93,
            0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.86, 0.85, 0.83, 0.82, 0.8,
            0.78, 0.77, 0.94, 0.93, 0.93, 0.92, 0.91, 0.9, 0.89, 0.94, 0.94,
            0.93, 0.92, 0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.95, 0.94, 0.94,
            0.95, 0.95, 0.94, 0.93, 0.95, 0.95, 0.94, 0.94, 0.93, 0.92, 0.91,
            0.9, 0.89, 0.88, 0.87, 0.86, 0.96, 0.95, 0.95, 0.94, 0.93, 0.93,
            0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.85, 0.84, 0.83, 0.81, 0.79,
            0.78, 0.76, 0.74, 0.72, 0.7, 0.68, 0.66, 0.63, 0.61, 0.59, 0.56,
            0.54, 0.52, 0.49, 0.47, 0.44, 0.42, 0.4, 0.38, 0.36, 0.34, 0.32,
            0.3, 0.28, 0.26, 0.24, 0.23, 0.21, 0.2, 0.18, 0.17, 0.16, 0.15,
            0.14, 0.13, 0.12, 0.11, 0.1, 0.1, 0.09, 0.08, 0.08, 0.07, 0.07,
            0.06, 0.06, 0.05
        ]
        labels = ['P_alive', 'purchases']

        transaction_data = load_transaction_data()
        # yes I know this is using the wrong data, but I'm testing plotting here.
        id_user = 35
        days_since_birth = 200
        sp_trans = transaction_data.loc[transaction_data['id'] == id_user]
        ax = plotting.plot_history_alive(bgf, days_since_birth, sp_trans,
                                         'date')

        x, y = ax.lines[0].get_data()
        legend = ax.legend_

        assert_allclose([np.round(e, 5) for e in y], y_expected,
                        atol=0.01)  # y has some weird array shapes
        assert_array_equal(x, x_expected)
        assert_array_equal([e.get_text() for e in legend.get_texts()], labels)
        assert_equal(ax.title.get_text(), "History of P_alive")
        assert_equal(ax.xaxis.get_label().get_text(), "")
        assert_equal(ax.yaxis.get_label().get_text(), "P_alive")
        plt.close()
Exemplo n.º 12
0
    def test_purchase_predictions_do_not_differ_much_if_looking_at_hourly_or_daily_frequencies(self):
        transaction_data = load_transaction_data(parse_dates=['date'])
        daily_summary = utils.summary_data_from_transaction_data(transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='D')
        hourly_summary = utils.summary_data_from_transaction_data(transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='h')
        thirty_days = 30
        hours_in_day = 24
        mbfg = estimation.ModifiedBetaGeoFitter()

        np.random.seed(0)
        mbfg.fit(daily_summary['frequency'], daily_summary['recency'], daily_summary['T'])
        thirty_day_prediction_from_daily_data = mbfg.expected_number_of_purchases_up_to_time(thirty_days)

        np.random.seed(0)
        mbfg.fit(hourly_summary['frequency'], hourly_summary['recency'], hourly_summary['T'])
        thirty_day_prediction_from_hourly_data = mbfg.expected_number_of_purchases_up_to_time(thirty_days * hours_in_day)

        npt.assert_almost_equal(thirty_day_prediction_from_daily_data, thirty_day_prediction_from_hourly_data)
Exemplo n.º 13
0
def transaction_data():
    return load_transaction_data()
Exemplo n.º 14
0
#plt.show()

#from lifetimes.plotting import plot_probability_alive_matrix
#plot_probability_alive_matrix(bgf)


#plt.show()

t = 1
data['predicted_purchases'] = data.apply(lambda r: bgf.conditional_expected_number_of_purchases_up_to_time(t, r['frequency'], r['recency'], r['T']), axis = 1)
print data.sort('predicted_purchases').tail(5)

from lifetimes.datasets import load_transaction_data
from lifetimes.utils import summary_data_from_transaction_data

transaction_data = load_transaction_data()
print transaction_data.head()
print type(transaction_data)
print transaction_data.columns

print data.columns
print data.head()

t = 10
data['predicted_purchases'] = data.apply(lambda r: bgf.conditional_expected_number_of_purchases_up_to_time(t, r['frequency'], r['recency'], r['T']), axis = 1)
print data

from pandas import DataFrame
d = [{'id': 1, 'R':23, 'F':12, 'M':12.5}, {'id': 2,'R':43, 'F':1, 'M':120.5}, {'id': 3,'R':203, 'F':2, 'M':19.5}]
test = DataFrame(d)
print test