Пример #1
0
    def test_plot_cumulative_transactions(self):
        """Test plotting cumultative transactions with CDNOW example."""
        transactions = load_dataset('CDNOW_sample.txt', header=None, sep='\s+')
        transactions.columns = [
            'id_total', 'id_sample', 'date', 'num_cd_purc', 'total_value'
        ]
        t = 39
        freq = 'W'

        transactions_summary = utils.summary_data_from_transaction_data(
            transactions,
            'id_sample',
            'date',
            datetime_format='%Y%m%d',
            observation_period_end='19970930',
            freq=freq)

        bgf = BetaGeoFitter(penalizer_coef=0.01)
        bgf.fit(transactions_summary['frequency'],
                transactions_summary['recency'], transactions_summary['T'])

        plt.figure()
        plotting.plot_cumulative_transactions(bgf,
                                              transactions,
                                              'date',
                                              'id_sample',
                                              2 * t,
                                              t,
                                              freq=freq,
                                              xlabel='week',
                                              datetime_format='%Y%m%d')
        return plt.gcf()
Пример #2
0
    def test_plot_cumulative_transactions(self, cdnow_transactions, bgf_transactions):
        """Test plotting cumultative transactions with CDNOW example."""
        t = 39
        freq = 'W'

        plt.figure()
        plotting.plot_cumulative_transactions(
            bgf_transactions, cdnow_transactions, 'date', 'id_sample', 2 * t,
            t, freq=freq, xlabel='week', datetime_format='%Y%m%d')
        return plt.gcf()
def evaluation_plots(plot_type):
    """
    Evaluation Plots:
    - Tracking Cumulative Transactions
    - Tracking Daily Transactions
    - Frequency of Repeated Transactions
    - Calibration vs Holdout.

    Parameters
    ----------
        plot_type: str.
            "tracking" - Tracking Cumulative and Tracking Daily Transactions.
            "repeated" - Frequency of Repeated Transactions.
            "calibration_holdout" - Calibration vs Holdout Purchases.
    """
    # Loading Calibration Model.
    cal_bg_nbd = BetaGeoFitter(penalizer_coef=0.0)
    cal_bg_nbd.load_model(path="models/calibration_model.pkl")

    # Loading summary_cal_holdout dataset.
    summary_cal_holdout = pd.read_csv("datasets/summary_cal_holdout.csv")

    # Loading Transactions.
    transactions = pd.read_csv("datasets/transactions.csv")

    if plot_type == "tracking":
        fig = plt.figure(figsize=(20, 4))
        plot_cumulative_transactions(model=cal_bg_nbd,
                                     transactions=transactions,
                                     datetime_col="order_purchase_timestamp",
                                     customer_id_col="customer_unique_id",
                                     t=604,
                                     t_cal=512,
                                     freq="D",
                                     ax=fig.add_subplot(121))

        plot_incremental_transactions(model=cal_bg_nbd,
                                      transactions=transactions,
                                      datetime_col="order_purchase_timestamp",
                                      customer_id_col="customer_unique_id",
                                      t=604,
                                      t_cal=512,
                                      freq="D",
                                      ax=fig.add_subplot(122))
    elif plot_type == "repeated":
        plot_period_transactions(model=cal_bg_nbd)

    elif plot_type == "calibration_holdout":
        plot_calibration_purchases_vs_holdout_purchases(
            model=cal_bg_nbd, calibration_holdout_matrix=summary_cal_holdout)
    return
Пример #4
0
    def test_plot_cumulative_transactions(self, cdnow_transactions,
                                          bgf_transactions):
        """Test plotting cumultative transactions with CDNOW example."""

        actual = [
            0, 3, 17, 44, 67, 122, 173, 240, 313, 375, 466, 555, 655, 739, 825,
            901, 970, 1033, 1091, 1159, 1217, 1277, 1325, 1367, 1444, 1528,
            1584, 1632, 1675, 1741, 1813, 1846, 1894, 1954, 2002, 2051, 2094,
            2141, 2195, 2248, 2299, 2344, 2401, 2452, 2523, 2582, 2636, 2685,
            2739, 2805, 2860, 2891, 2933, 2983, 3023, 3057, 3099, 3140, 3184,
            3226, 3283, 3344, 3400, 3456, 3517, 3553, 3592, 3632, 3661, 3699,
            3740, 3770, 3802, 3842, 3887, 3939, 3967, 4001
        ]
        predicted = [
            4.089e+00, 1.488e+01, 3.240e+01, 5.716e+01, 8.939e+01, 1.297e+02,
            1.769e+02, 2.310e+02, 2.927e+02, 3.616e+02, 4.369e+02, 5.174e+02,
            5.984e+02, 6.775e+02, 7.549e+02, 8.307e+02, 9.052e+02, 9.784e+02,
            1.050e+03, 1.121e+03, 1.191e+03, 1.260e+03, 1.328e+03, 1.396e+03,
            1.462e+03, 1.528e+03, 1.594e+03, 1.658e+03, 1.722e+03, 1.786e+03,
            1.849e+03, 1.911e+03, 1.973e+03, 2.035e+03, 2.096e+03, 2.156e+03,
            2.216e+03, 2.276e+03, 2.335e+03, 2.394e+03, 2.452e+03, 2.511e+03,
            2.568e+03, 2.626e+03, 2.683e+03, 2.740e+03, 2.797e+03, 2.853e+03,
            2.909e+03, 2.964e+03, 3.020e+03, 3.075e+03, 3.130e+03, 3.185e+03,
            3.239e+03, 3.293e+03, 3.347e+03, 3.401e+03, 3.454e+03, 3.507e+03,
            3.560e+03, 3.613e+03, 3.666e+03, 3.718e+03, 3.771e+03, 3.823e+03,
            3.874e+03, 3.926e+03, 3.978e+03, 4.029e+03, 4.080e+03, 4.131e+03,
            4.182e+03, 4.232e+03, 4.283e+03, 4.333e+03, 4.383e+03, 4.433e+03
        ]
        labels = ['actual', 'predicted']
        t = 39
        freq = 'W'

        ax = plotting.plot_cumulative_transactions(bgf_transactions,
                                                   cdnow_transactions,
                                                   'date',
                                                   'id_sample',
                                                   2 * t,
                                                   t,
                                                   freq=freq,
                                                   xlabel='week',
                                                   datetime_format='%Y%m%d')

        lines = ax.lines
        legend = ax.legend_

        actual_y = lines[0].get_data()[1]
        predicted_y = lines[1].get_data()[1]
        assert_allclose(actual, actual_y, rtol=0.01)
        assert_allclose(predicted, predicted_y, rtol=0.01)
        assert_array_equal([e.get_text() for e in legend.get_texts()], labels)
        assert_equal(ax.title.get_text(), "Tracking Cumulative Transactions")
        assert_equal(ax.xaxis.get_label().get_text(), "week")
        assert_equal(ax.yaxis.get_label().get_text(),
                     "Cumulative Transactions")
        plt.close()