Пример #1
0
def test_expected_cumulative_transactions_dedups_inside_a_time_period(
        fitted_bg, example_transaction_data):
    by_week = utils.expected_cumulative_transactions(fitted_bg,
                                                     example_transaction_data,
                                                     'date',
                                                     'id',
                                                     10,
                                                     freq='W')
    by_day = utils.expected_cumulative_transactions(fitted_bg,
                                                    example_transaction_data,
                                                    'date',
                                                    'id',
                                                    10,
                                                    freq='D')
    assert (by_week['actual'] >= by_day['actual']).all()
Пример #2
0
def test_expected_cumulative_transactions_dedups_inside_a_time_period(
        fitted_bg, example_transaction_data):
    by_week = utils.expected_cumulative_transactions(fitted_bg,
                                                     example_transaction_data,
                                                     "date",
                                                     "id",
                                                     10,
                                                     freq="W")
    by_day = utils.expected_cumulative_transactions(fitted_bg,
                                                    example_transaction_data,
                                                    "date",
                                                    "id",
                                                    10,
                                                    freq="D")
    assert (by_week["actual"] >= by_day["actual"]).all()
Пример #3
0
def df_cum_transactions(cdnow_transactions):
    datetime_col = 'date'
    customer_id_col = 'id_sample'
    t = 25 * 7
    datetime_format = '%Y%m%d'
    freq = 'D'
    observation_period_end = '19970930'
    freq_multiplier = 7

    transactions_summary = utils.summary_data_from_transaction_data(
        cdnow_transactions, customer_id_col, datetime_col,
        datetime_format=datetime_format, freq=freq, freq_multiplier=freq_multiplier,
        observation_period_end=observation_period_end)

    transactions_summary = transactions_summary.reset_index()

    model = ParetoNBDFitter()
    model.fit(transactions_summary['frequency'],
              transactions_summary['recency'],
              transactions_summary['T'])

    df_cum = utils.expected_cumulative_transactions(
        model, cdnow_transactions, datetime_col, customer_id_col, t,
        datetime_format, freq, set_index_date=False, freq_multiplier=freq_multiplier)
    return df_cum
Пример #4
0
def plot_incremental_transactions(model,
                                  transactions,
                                  datetime_col,
                                  customer_id_col,
                                  t,
                                  t_cal,
                                  datetime_format=None,
                                  freq='D',
                                  set_index_date=False,
                                  title='Tracking Daily Transactions',
                                  xlabel='day',
                                  ylabel='Transactions',
                                  **kwargs):
    """
    Plot a figure of the predicted and actual cumulative transactions of users
    Parameters:
        model: A fitted lifetimes model
        transactions: a Pandas DataFrame containing the transactions history of the customer_id
        datetime_col: the column in transactions that denotes the datetime the purchase was made.
        customer_id_col: the column in transactions that denotes the customer_id
        t: the number of time units since the begining of
            data for which we want to calculate cumulative transactions
        datetime_format: a string that represents the timestamp format. Useful if Pandas can't understand
            the provided format.
        freq: Default 'D' for days, 'W' for weeks, 'M' for months... etc. Full list here:
            http://pandas.pydata.org/pandas-docs/stable/timeseries.html#dateoffset-objects
        set_index_date: when True set date as Pandas DataFrame index, default False - number of time units
        title: figure title
        xlabel: figure xlabel, if set_index_date is True will be overwrited to date
        ylabel: figure ylabel
        kwargs: passed into the pandas.DataFrame.plot command.
    """
    from matplotlib import pyplot as plt

    ax = kwargs.pop('ax', None) or plt.subplot(111)

    df_cum_transactions = expected_cumulative_transactions(
        model,
        transactions,
        datetime_col,
        customer_id_col,
        t,
        datetime_format=datetime_format,
        freq=freq,
        set_index_date=set_index_date)

    # get incremental from cumulative transactions
    df_cum_transactions = df_cum_transactions.apply(lambda x: x - x.shift(1))
    ax = df_cum_transactions.plot(ax=ax, title=title, **kwargs)

    if set_index_date:
        x_vline = df_cum_transactions.index[int(t_cal)]
        xlabel = 'date'
    else:
        x_vline = t_cal
    ax.axvline(x=x_vline, color='r', linestyle='--')
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    return ax
Пример #5
0
def test_expected_cumulative_transactions_date_index(cdnow_transactions):
    """
    Test set_index as date for cumulative transactions and bgf fitter.

    Get first 14 cdnow transactions dates and validate that date index,
    freq_multiplier = 1 working and compare with tested data for last 4 records.

    dates = ['1997-01-11', '1997-01-12', '1997-01-13', '1997-01-14']
    actual_trans = [11, 12, 15, 19]
    expected_trans = [10.67, 12.67, 14.87, 17.24]

    """
    datetime_col = "date"
    customer_id_col = "id_sample"
    t = 14
    datetime_format = "%Y%m%d"
    freq = "D"
    observation_period_end = "19970930"
    freq_multiplier = 1

    transactions_summary = utils.summary_data_from_transaction_data(
        cdnow_transactions,
        customer_id_col,
        datetime_col,
        datetime_format=datetime_format,
        freq=freq,
        freq_multiplier=freq_multiplier,
        observation_period_end=observation_period_end,
    )

    transactions_summary = transactions_summary.reset_index()

    model = BetaGeoFitter()
    model.fit(transactions_summary["frequency"],
              transactions_summary["recency"], transactions_summary["T"])

    df_cum = utils.expected_cumulative_transactions(
        model,
        cdnow_transactions,
        datetime_col,
        customer_id_col,
        t,
        datetime_format,
        freq,
        set_index_date=True,
        freq_multiplier=freq_multiplier,
    )

    dates = ["1997-01-11", "1997-01-12", "1997-01-13", "1997-01-14"]
    actual_trans = [11, 12, 15, 19]
    expected_trans = [10.67, 12.67, 14.87, 17.24]

    date_index = df_cum.iloc[-4:].index.to_timestamp().astype(str)
    actual = df_cum["actual"].iloc[-4:].values
    predicted = df_cum["predicted"].iloc[-4:].values.round(2)

    assert all(dates == date_index)
    assert_allclose(actual, actual_trans)
    assert_allclose(predicted, expected_trans, atol=1e-2)
Пример #6
0
def df_cum_transactions(cdnow_transactions):
    datetime_col = "date"
    customer_id_col = "id_sample"
    t = 25 * 7
    datetime_format = "%Y%m%d"
    freq = "D"
    observation_period_end = "19970930"
    freq_multiplier = 7

    transactions_summary = utils.summary_data_from_transaction_data(
        cdnow_transactions,
        customer_id_col,
        datetime_col,
        datetime_format=datetime_format,
        freq=freq,
        freq_multiplier=freq_multiplier,
        observation_period_end=observation_period_end,
    )

    transactions_summary = transactions_summary.reset_index()

    model = ParetoNBDFitter()
    model.fit(transactions_summary["frequency"],
              transactions_summary["recency"], transactions_summary["T"])

    df_cum = utils.expected_cumulative_transactions(
        model,
        cdnow_transactions,
        datetime_col,
        customer_id_col,
        t,
        datetime_format,
        freq,
        set_index_date=False,
        freq_multiplier=freq_multiplier,
    )
    return df_cum
Пример #7
0
def plot_cumulative_transactions(model, transactions, datetime_col, customer_id_col, t, t_cal,
                                 datetime_format=None, freq='D', set_index_date=False,
                                 title='Tracking Cumulative Transactions',
                                 xlabel='day', ylabel='Cumulative Transactions',
                                 ax=None, **kwargs):
    """
    Plot a figure of the predicted and actual cumulative transactions of users.

    Parameters
    ----------
    model: lifetimes model
        A fitted lifetimes model
    transactions: pandas DataFrame
        DataFrame containing the transactions history of the customer_id
    datetime_col: str
        The column in transactions that denotes the datetime the purchase was made.
    customer_id_col: str
        The column in transactions that denotes the customer_id
    t: float
        The number of time units since the begining of
        data for which we want to calculate cumulative transactions
    datetime_format: str, optional
        A string that represents the timestamp format. Useful if Pandas
        can't understand the provided format.
    freq: str, optional
        Default 'D' for days, 'W' for weeks, 'M' for months... etc.
        Full list here:
        http://pandas.pydata.org/pandas-docs/stable/timeseries.html#dateoffset-objects
    set_index_date: bool, optional
        When True set date as Pandas DataFrame index, default False - number of time units
    title: str, optional
        Figure title
    xlabel: str, optional
        Figure xlabel
    ylabel: str, optional
        Figure ylabel
    ax: matplotlib.AxesSubplot, optional
        Using user axes
    kwargs
        Passed into the pandas.DataFrame.plot command.

    Returns
    -------
    axes: matplotlib.AxesSubplot

    """
    from matplotlib import pyplot as plt

    if ax is None:
        ax = plt.subplot(111)

    df_cum_transactions = expected_cumulative_transactions(model, transactions, datetime_col,
                                                           customer_id_col, t,
                                                           datetime_format=datetime_format, freq=freq,
                                                           set_index_date=set_index_date)

    ax = df_cum_transactions.plot(ax=ax, title=title, **kwargs)

    if set_index_date:
        x_vline = df_cum_transactions.index[int(t_cal)]
        xlabel = 'date'
    else:
        x_vline = t_cal
    ax.axvline(x=x_vline, color='r', linestyle='--')
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    return ax