示例#1
0
    def test_fit_with_and_without_weights(self, cdnow_customers):
        original_dataset_with_weights = cdnow_customers.copy()
        original_dataset_with_weights = original_dataset_with_weights.groupby(
            ["frequency", "recency", "T"]).size()
        original_dataset_with_weights = original_dataset_with_weights.reset_index(
        )
        original_dataset_with_weights = original_dataset_with_weights.rename(
            columns={0: "weights"})

        pnbd_noweights = lt.ParetoNBDFitter()
        pnbd_noweights.fit(cdnow_customers["frequency"],
                           cdnow_customers["recency"], cdnow_customers["T"])

        pnbd = lt.ParetoNBDFitter()
        pnbd.fit(
            original_dataset_with_weights["frequency"],
            original_dataset_with_weights["recency"],
            original_dataset_with_weights["T"],
            original_dataset_with_weights["weights"],
        )

        npt.assert_array_almost_equal(
            np.array(pnbd_noweights._unload_params("r", "alpha", "s", "beta")),
            np.array(pnbd._unload_params("r", "alpha", "s", "beta")),
            decimal=2,
        )
示例#2
0
    def test_fit_with_index(self, cdnow_customers):
        ptf = lt.ParetoNBDFitter()
        index = range(len(cdnow_customers), 0, -1)
        ptf.fit(cdnow_customers["frequency"],
                cdnow_customers["recency"],
                cdnow_customers["T"],
                index=index)
        assert (ptf.data.index == index).all() == True

        ptf = lt.ParetoNBDFitter()
        ptf.fit(cdnow_customers["frequency"],
                cdnow_customers["recency"],
                cdnow_customers["T"],
                index=None)
        assert (ptf.data.index == index).all() == False
示例#3
0
 def test_conditional_probability_alive(self, cdnow_customers):
     """
     Target taken from page 8,
     https://cran.r-project.org/web/packages/BTYD/vignettes/BTYD-walkthrough.pdf
     """
     ptf = lt.ParetoNBDFitter()
     ptf.params_ = pd.Series(*([0.5534, 10.5802, 0.6061, 11.6562],
                               ["r", "alpha", "s", "beta"]))
     p_alive = ptf.conditional_probability_alive(26.00, 30.86, 31.00)
     assert abs(p_alive - 0.9979) < 0.001
示例#4
0
    def test_conditional_probability_alive_matrix(self, cdnow_customers):
        ptf = lt.ParetoNBDFitter()
        ptf.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
                cdnow_customers["T"])
        Z = ptf.conditional_probability_alive_matrix()
        max_t = int(ptf.data["T"].max())

        for t_x in range(Z.shape[0]):
            for x in range(Z.shape[1]):
                assert Z[t_x][x] == ptf.conditional_probability_alive(
                    x, t_x, max_t)
示例#5
0
 def test_conditional_probability_alive_overflow_error(self):
     ptf = lt.ParetoNBDFitter()
     ptf.params_ = pd.Series(*([10.465, 7.98565181e-03, 3.0516, 2.820],
                               ["r", "alpha", "s", "beta"]))
     freq = np.array([40.0, 50.0, 50.0])
     rec = np.array([5.0, 1.0, 4.0])
     age = np.array([6.0, 37.0, 37.0])
     assert all([
         r <= 1 and r >= 0 and not np.isinf(r) and not pd.isnull(r)
         for r in ptf.conditional_probability_alive(freq, rec, age)
     ])
示例#6
0
    def test_conditional_probability_alive_is_between_0_and_1(
            self, cdnow_customers):
        ptf = lt.ParetoNBDFitter()
        ptf.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
                cdnow_customers["T"])

        for freq in np.arange(0, 100, 10.0):
            for recency in np.arange(0, 100, 10.0):
                for t in np.arange(recency, 100, 10.0):
                    assert 0.0 <= ptf.conditional_probability_alive(
                        freq, recency, t) <= 1.0
示例#7
0
 def test_params_out_is_close_to_Hardie_paper(self, cdnow_customers):
     ptf = lt.ParetoNBDFitter()
     ptf.fit(cdnow_customers["frequency"],
             cdnow_customers["recency"],
             cdnow_customers["T"],
             iterative_fitting=3)
     expected = np.array([0.553, 10.578, 0.606, 11.669])
     npt.assert_array_almost_equal(
         expected,
         np.array(ptf._unload_params("r", "alpha", "s", "beta")),
         decimal=2)
示例#8
0
    def test_overflow_error(self):

        ptf = lt.ParetoNBDFitter()
        params = np.array([10.465, 7.98565181e-03, 3.0516, 2.820])
        freq = np.array([400.0, 500.0, 500.0])
        rec = np.array([5.0, 1.0, 4.0])
        age = np.array([6.0, 37.0, 37.0])
        assert all([
            r < 0 and not np.isinf(r) and not pd.isnull(r)
            for r in ptf._log_A_0(params, freq, rec, age)
        ])
示例#9
0
 def test_conditional_expectation_returns_same_value_as_R_BTYD(
         self, cdnow_customers):
     """ From https://cran.r-project.org/web/packages/BTYD/vignettes/BTYD-walkthrough.pdf """
     ptf = lt.ParetoNBDFitter()
     ptf.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
             cdnow_customers["T"])
     x = 26.00
     t_x = 30.86
     T = 31
     t = 52
     expected = 25.46
     actual = ptf.conditional_expected_number_of_purchases_up_to_time(
         t, x, t_x, T)
     assert abs(expected - actual) < 0.01
示例#10
0
    def test_conditional_probability_of_n_purchases_up_to_time_is_between_0_and_1(
            self, cdnow_customers):
        """
        Due to the large parameter space we take a random subset.
        """
        ptf = lt.ParetoNBDFitter()
        ptf.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
                cdnow_customers["T"])

        for freq in np.random.choice(100, 5):
            for recency in np.random.choice(100, 5):
                for age in recency + np.random.choice(100, 5):
                    for t in np.random.choice(100, 5):
                        for n in np.random.choice(10, 5):
                            assert (
                                0.0 <= ptf.
                                conditional_probability_of_n_purchases_up_to_time(
                                    n, t, freq, recency, age) <= 1.0)
示例#11
0
    def test_conditional_expectation_underflow(self):
        """ Test a pair of inputs for the ParetoNBD ptf.conditional_expected_number_of_purchases_up_to_time().
            For a small change in the input, the result shouldn't change dramatically -- however, if the
            function doesn't guard against numeric underflow, this change in input will result in an
            underflow error.
        """
        ptf = lt.ParetoNBDFitter()
        alpha = 10.58
        beta = 11.67
        r = 0.55
        s = 0.61
        ptf.params_ = pd.Series({"alpha": alpha, "beta": beta, "r": r, "s": s})

        # small change in inputs
        left = ptf.conditional_expected_number_of_purchases_up_to_time(
            10, 132, 200, 200)  # 6.2060517889632418
        right = ptf.conditional_expected_number_of_purchases_up_to_time(
            10, 133, 200, 200)  # 6.2528722475748113
        assert abs(left - right) < 0.05
示例#12
0
    def test_expectation_returns_same_value_as_R_BTYD(self, cdnow_customers):
        """ From https://cran.r-project.org/web/packages/BTYD/BTYD.pdf """
        ptf = lt.ParetoNBDFitter()
        ptf.fit(cdnow_customers["frequency"],
                cdnow_customers["recency"],
                cdnow_customers["T"],
                tol=1e-6)

        expected = np.array([
            0.00000000,
            0.05077821,
            0.09916088,
            0.14542507,
            0.18979930,
            0.23247466,
            0.27361274,
            0.31335159,
            0.35181024,
            0.38909211,
        ])
        actual = ptf.expected_number_of_purchases_up_to_time(range(10))
        npt.assert_allclose(expected, actual, atol=0.01)
示例#13
0
    def test_conditional_probability_of_n_purchases_up_to_time_adds_up_to_1(
            self, cdnow_customers):
        """
        Due to the large parameter space we take a random subset. We also restrict our limits to keep the number of
        values of n for which the probability needs to be calculated to a sane level.
        """
        ptf = lt.ParetoNBDFitter()
        ptf.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
                cdnow_customers["T"])

        for freq in np.random.choice(10, 5):
            for recency in np.random.choice(9, 5):
                for age in np.random.choice(np.arange(recency, 10, 1), 5):
                    for t in 1 + np.random.choice(9, 5):
                        npt.assert_almost_equal(
                            np.sum([
                                ptf.
                                conditional_probability_of_n_purchases_up_to_time(
                                    n, t, freq, recency, age)
                                for n in np.arange(0, 20, 1)
                            ]),
                            1.0,
                            decimal=2,
                        )
示例#14
0
    def load_data(data, day=t_days, profit=profit_m):

        input_data = pd.read_csv(data)

        input_data = pd.DataFrame(input_data.iloc[:, 1:])

        #Pareto Model

        pareto_model = lifetimes.ParetoNBDFitter(penalizer_coef=0.1)
        pareto_model.fit(input_data["frequency"], input_data["recency"],
                         input_data["T"])
        input_data[
            "p_not_alive"] = 1 - pareto_model.conditional_probability_alive(
                input_data["frequency"], input_data["recency"],
                input_data["T"])
        input_data["p_alive"] = pareto_model.conditional_probability_alive(
            input_data["frequency"], input_data["recency"], input_data["T"])
        t = days
        input_data[
            "predicted_purchases"] = pareto_model.conditional_expected_number_of_purchases_up_to_time(
                t, input_data["frequency"], input_data["recency"],
                input_data["T"])

        #Gamma Gamma Model

        idx = input_data[(input_data["frequency"] <= 0.0)]
        idx = idx.index
        input_data = input_data.drop(idx, axis=0)
        m_idx = input_data[(input_data["monetary_value"] <= 0.0)].index
        input_data = input_data.drop(m_idx, axis=0)

        input_data.reset_index().drop("index", axis=1, inplace=True)

        ggf_model = lifetimes.GammaGammaFitter(penalizer_coef=0.1)

        ggf_model.fit(input_data["frequency"], input_data["monetary_value"])

        input_data[
            "expected_avg_sales_"] = ggf_model.conditional_expected_average_profit(
                input_data["frequency"], input_data["monetary_value"])

        input_data["predicted_clv"] = ggf_model.customer_lifetime_value(
            pareto_model,
            input_data["frequency"],
            input_data["recency"],
            input_data["T"],
            input_data["monetary_value"],
            time=30,
            freq='D',
            discount_rate=0.01)

        input_data["profit_margin"] = input_data["predicted_clv"] * profit

        input_data = input_data.reset_index().drop("index", axis=1)

        #K-Means Model

        col = [
            "predicted_purchases", "expected_avg_sales_", "predicted_clv",
            "profit_margin"
        ]

        new_df = input_data[col]

        k_model = KMeans(n_clusters=4,
                         init="k-means++",
                         n_jobs=-1,
                         max_iter=1000).fit(new_df)

        labels = k_model.labels_

        labels = pd.Series(labels, name="Labels")

        input_data = pd.concat([input_data, labels], axis=1)

        st.write(input_data)

        #adding a count bar chart

        fig = alt.Chart(input_data).mark_bar().encode(y="Labels:N",
                                                      x="count(Labels):Q")

        #adding a annotation to the chart

        text = fig.mark_text(align="left", baseline="middle",
                             dx=3).encode(text="count(Labels):Q")

        chart = (fig + text)

        #showing the chart

        st.altair_chart(chart, use_container_width=True)

        #creating a button to download the result

        st.markdown("""

			### Download Your File Now!!! 

			""")

        text = """\
		There is currently no official way of downloading data from Streamlit as if now. So Please download the data from the below link using **"Save As"**."""

        st.markdown(text)

        download = input_data
        # When no file name is given, pandas returns the CSV as a string, nice.
        csv = download.to_csv(index=False)
        b64 = base64.b64encode(csv.encode()).decode(
        )  # some strings <-> bytes conversions necessary here
        href = f'<a href="data:file/csv;base64,{b64}">Download CSV File</a> (right-click and save as &lt;some_name&gt;.csv)'
        st.markdown(href, unsafe_allow_html=True)
    def load_data(data, day=t_days, profit=profit_m):

        input_data = pd.read_csv(data)

        input_data = pd.DataFrame(input_data.iloc[:, 1:])

        #Pareto Model

        pareto_model = lifetimes.ParetoNBDFitter(penalizer_coef=0.1)
        pareto_model.fit(input_data["frequency"], input_data["recency"],
                         input_data["T"])
        input_data[
            "p_not_alive"] = 1 - pareto_model.conditional_probability_alive(
                input_data["frequency"], input_data["recency"],
                input_data["T"])
        input_data["p_alive"] = pareto_model.conditional_probability_alive(
            input_data["frequency"], input_data["recency"], input_data["T"])
        t = days
        input_data[
            "predicted_purchases"] = pareto_model.conditional_expected_number_of_purchases_up_to_time(
                t, input_data["frequency"], input_data["recency"],
                input_data["T"])

        #Gamma Gamma Model

        idx = input_data[(input_data["frequency"] <= 0.0)]
        idx = idx.index
        input_data = input_data.drop(idx, axis=0)
        m_idx = input_data[(input_data["monetary_value"] <= 0.0)].index
        input_data = input_data.drop(m_idx, axis=0)

        input_data.reset_index().drop("index", axis=1, inplace=True)

        ggf_model = lifetimes.GammaGammaFitter(penalizer_coef=0.1)

        ggf_model.fit(input_data["frequency"], input_data["monetary_value"])

        input_data[
            "expected_avg_sales_"] = ggf_model.conditional_expected_average_profit(
                input_data["frequency"], input_data["monetary_value"])

        input_data["predicted_clv"] = ggf_model.customer_lifetime_value(
            pareto_model,
            input_data["frequency"],
            input_data["recency"],
            input_data["T"],
            input_data["monetary_value"],
            time=30,
            freq='D',
            discount_rate=0.01)

        input_data["profit_margin"] = input_data["predicted_clv"] * profit

        input_data = input_data.reset_index().drop("index", axis=1)

        #K-Means Model

        col = [
            "predicted_purchases", "expected_avg_sales_", "predicted_clv",
            "profit_margin"
        ]

        new_df = input_data[col]

        k_model = KMeans(n_clusters=4,
                         init="k-means++",
                         n_jobs=-1,
                         max_iter=1000).fit(new_df)

        labels = k_model.labels_

        labels = pd.Series(labels, name="Labels")

        input_data = pd.concat([input_data, labels], axis=1)

        label_mapper = dict({0: "Low", 3: "Medium", 1: "High", 2: "V_High"})

        input_data["Labels"] = input_data["Labels"].map(label_mapper)

        #saving the input data in the separate variable

        download = input_data

        st.write(input_data)

        #adding a count bar chart

        fig = alt.Chart(input_data).mark_bar().encode(y="Labels:N",
                                                      x="count(Labels):Q")

        #adding a annotation to the chart

        text = fig.mark_text(align="left", baseline="middle",
                             dx=3).encode(text="count(Labels):Q")

        chart = (fig + text)

        #showing the chart

        st.altair_chart(chart, use_container_width=True)

        #creating a button to download the result

        if st.button("Download"):
            st.write(
                "Successfully Downloaded!!! Please Check Your Default Download Location...:smile:"
            )
            return download.to_csv("customer_lifetime_prediction_result.csv")