Пример #1
0
    def test_using_weights_col_gives_correct_results(
            self, cdnow_customers_with_monetary_value):
        cdnow_customers_with_monetary_value = cdnow_customers_with_monetary_value[
            cdnow_customers_with_monetary_value["frequency"] > 0]
        cdnow_customers_weights = cdnow_customers_with_monetary_value.copy()
        cdnow_customers_weights["weights"] = 1.0
        cdnow_customers_weights = cdnow_customers_weights.groupby(
            ["frequency", "monetary_value"])["weights"].sum()
        cdnow_customers_weights = cdnow_customers_weights.reset_index()
        assert (cdnow_customers_weights["weights"] > 1).any()

        gg_weights = lt.GammaGammaFitter(penalizer_coef=0.0)
        gg_weights.fit(
            cdnow_customers_weights["frequency"],
            cdnow_customers_weights["monetary_value"],
            weights=cdnow_customers_weights["weights"],
        )

        gg_no_weights = lt.GammaGammaFitter(penalizer_coef=0.0)
        gg_no_weights.fit(
            cdnow_customers_with_monetary_value["frequency"],
            cdnow_customers_with_monetary_value["monetary_value"])

        npt.assert_almost_equal(
            np.array(gg_no_weights._unload_params("p", "q", "v")),
            np.array(gg_weights._unload_params("p", "q", "v")),
            decimal=3,
        )
Пример #2
0
    def test_fit_with_index(self, cdnow_customers_with_monetary_value):
        returning_cdnow_customers_with_monetary_value = cdnow_customers_with_monetary_value[
            cdnow_customers_with_monetary_value["frequency"] > 0]

        ggf = lt.GammaGammaFitter()
        index = range(len(returning_cdnow_customers_with_monetary_value), 0,
                      -1)
        ggf.fit(
            returning_cdnow_customers_with_monetary_value["frequency"],
            returning_cdnow_customers_with_monetary_value["monetary_value"],
            index=index,
        )
        assert (ggf.data.index == index).all()

        ggf = lt.GammaGammaFitter()
        ggf.fit(
            returning_cdnow_customers_with_monetary_value["frequency"],
            returning_cdnow_customers_with_monetary_value["monetary_value"],
            index=None,
        )
        assert not (ggf.data.index == index).all()
Пример #3
0
    def test_customer_lifetime_value_with_bgf(
            self, cdnow_customers_with_monetary_value):

        ggf = lt.GammaGammaFitter()
        ggf.params_ = pd.Series({"p": 6.25, "q": 3.74, "v": 15.44})

        bgf = lt.BetaGeoFitter()
        bgf.fit(
            cdnow_customers_with_monetary_value["frequency"],
            cdnow_customers_with_monetary_value["recency"],
            cdnow_customers_with_monetary_value["T"],
        )

        ggf_clv = ggf.customer_lifetime_value(
            bgf,
            cdnow_customers_with_monetary_value["frequency"],
            cdnow_customers_with_monetary_value["recency"],
            cdnow_customers_with_monetary_value["T"],
            cdnow_customers_with_monetary_value["monetary_value"],
        )

        utils_clv = utils._customer_lifetime_value(
            bgf,
            cdnow_customers_with_monetary_value["frequency"],
            cdnow_customers_with_monetary_value["recency"],
            cdnow_customers_with_monetary_value["T"],
            ggf.conditional_expected_average_profit(
                cdnow_customers_with_monetary_value["frequency"],
                cdnow_customers_with_monetary_value["monetary_value"]),
        )
        npt.assert_equal(ggf_clv.values, utils_clv.values)

        ggf_clv = ggf.customer_lifetime_value(
            bgf,
            cdnow_customers_with_monetary_value["frequency"],
            cdnow_customers_with_monetary_value["recency"],
            cdnow_customers_with_monetary_value["T"],
            cdnow_customers_with_monetary_value["monetary_value"],
            freq="H",
        )

        utils_clv = utils._customer_lifetime_value(
            bgf,
            cdnow_customers_with_monetary_value["frequency"],
            cdnow_customers_with_monetary_value["recency"],
            cdnow_customers_with_monetary_value["T"],
            ggf.conditional_expected_average_profit(
                cdnow_customers_with_monetary_value["frequency"],
                cdnow_customers_with_monetary_value["monetary_value"]),
            freq="H",
        )
        npt.assert_equal(ggf_clv.values, utils_clv.values)
Пример #4
0
 def test_params_out_is_close_to_Hardie_paper(
         self, cdnow_customers_with_monetary_value):
     returning_cdnow_customers_with_monetary_value = cdnow_customers_with_monetary_value[
         cdnow_customers_with_monetary_value["frequency"] > 0]
     ggf = lt.GammaGammaFitter()
     ggf.fit(
         returning_cdnow_customers_with_monetary_value["frequency"],
         returning_cdnow_customers_with_monetary_value["monetary_value"],
     )
     expected = np.array([6.25, 3.74, 15.44])
     npt.assert_array_almost_equal(expected,
                                   np.array(
                                       ggf._unload_params("p", "q", "v")),
                                   decimal=2)
Пример #5
0
    def test_conditional_expected_average_profit(
            self, cdnow_customers_with_monetary_value):

        ggf = lt.GammaGammaFitter()
        ggf.params_ = pd.Series({"p": 6.25, "q": 3.74, "v": 15.44})

        summary = cdnow_customers_with_monetary_value.head(10)
        estimates = ggf.conditional_expected_average_profit(
            summary["frequency"], summary["monetary_value"])
        expected = np.array([
            24.65, 18.91, 35.17, 35.17, 35.17, 71.46, 18.91, 35.17, 27.28,
            35.17
        ])  # from Hardie spreadsheet http://brucehardie.com/notes/025/

        npt.assert_allclose(estimates.values, expected, atol=0.1)
Пример #6
0
    The distribution of average transaction values across customers is independent of the transaction process.
'''
# We are considering only customers who made repeat purchases with the business i.e., frequency > 0. Because, if frequency is 0, it means that they are one time customer and are considered already dead.
# final assumption (no relationship between frequency and monetary value of transactions) can be validated using Pearson correlation.

# Checking the relationship between frequency and monetary_value
# return_customers_summary = summary[summary['frequency']>0]
return_customers_summary = summary[summary['frequency']>0][summary['monetary_value']>0] # added additional filter to exclude transactions with <=0 monetary_value
print(return_customers_summary.shape)
return_customers_summary.head()

# Checking the relationship between frequency and monetary_value
return_customers_summary[['frequency', 'monetary_value']].corr()

# Modeling the monetary value using Gamma-Gamma Model
ggf = lifetimes.GammaGammaFitter(penalizer_coef=0.001)
ggf.fit(return_customers_summary['frequency'],
       return_customers_summary['monetary_value'])

# Summary of the fitted parameters
ggf.summary


# predict using the model
# predict the expected average profit for each each transaction and Customer Lifetime Value using the model

# Calculating the conditional expected average profit for each customer per transaction
# model.conditional_expected_average_profit(): This method computes the conditional expectation of the average profit per transaction for a group of one or more customers
summary = summary[summary['monetary_value'] >0]
summary['exp_avg_sales'] = ggf.conditional_expected_average_profit(summary['frequency'],
                                       summary['monetary_value'])
Пример #7
0
    def load_data(data, day=t_days, profit=profit_m):

        input_data = pd.read_csv(data)

        input_data = pd.DataFrame(input_data.iloc[:, 1:])

        #Pareto Model

        pareto_model = lifetimes.ParetoNBDFitter(penalizer_coef=0.1)
        pareto_model.fit(input_data["frequency"], input_data["recency"],
                         input_data["T"])
        input_data[
            "p_not_alive"] = 1 - pareto_model.conditional_probability_alive(
                input_data["frequency"], input_data["recency"],
                input_data["T"])
        input_data["p_alive"] = pareto_model.conditional_probability_alive(
            input_data["frequency"], input_data["recency"], input_data["T"])
        t = days
        input_data[
            "predicted_purchases"] = pareto_model.conditional_expected_number_of_purchases_up_to_time(
                t, input_data["frequency"], input_data["recency"],
                input_data["T"])

        #Gamma Gamma Model

        idx = input_data[(input_data["frequency"] <= 0.0)]
        idx = idx.index
        input_data = input_data.drop(idx, axis=0)
        m_idx = input_data[(input_data["monetary_value"] <= 0.0)].index
        input_data = input_data.drop(m_idx, axis=0)

        input_data.reset_index().drop("index", axis=1, inplace=True)

        ggf_model = lifetimes.GammaGammaFitter(penalizer_coef=0.1)

        ggf_model.fit(input_data["frequency"], input_data["monetary_value"])

        input_data[
            "expected_avg_sales_"] = ggf_model.conditional_expected_average_profit(
                input_data["frequency"], input_data["monetary_value"])

        input_data["predicted_clv"] = ggf_model.customer_lifetime_value(
            pareto_model,
            input_data["frequency"],
            input_data["recency"],
            input_data["T"],
            input_data["monetary_value"],
            time=30,
            freq='D',
            discount_rate=0.01)

        input_data["profit_margin"] = input_data["predicted_clv"] * profit

        input_data = input_data.reset_index().drop("index", axis=1)

        #K-Means Model

        col = [
            "predicted_purchases", "expected_avg_sales_", "predicted_clv",
            "profit_margin"
        ]

        new_df = input_data[col]

        k_model = KMeans(n_clusters=4,
                         init="k-means++",
                         n_jobs=-1,
                         max_iter=1000).fit(new_df)

        labels = k_model.labels_

        labels = pd.Series(labels, name="Labels")

        input_data = pd.concat([input_data, labels], axis=1)

        st.write(input_data)

        #adding a count bar chart

        fig = alt.Chart(input_data).mark_bar().encode(y="Labels:N",
                                                      x="count(Labels):Q")

        #adding a annotation to the chart

        text = fig.mark_text(align="left", baseline="middle",
                             dx=3).encode(text="count(Labels):Q")

        chart = (fig + text)

        #showing the chart

        st.altair_chart(chart, use_container_width=True)

        #creating a button to download the result

        st.markdown("""

			### Download Your File Now!!! 

			""")

        text = """\
		There is currently no official way of downloading data from Streamlit as if now. So Please download the data from the below link using **"Save As"**."""

        st.markdown(text)

        download = input_data
        # When no file name is given, pandas returns the CSV as a string, nice.
        csv = download.to_csv(index=False)
        b64 = base64.b64encode(csv.encode()).decode(
        )  # some strings <-> bytes conversions necessary here
        href = f'<a href="data:file/csv;base64,{b64}">Download CSV File</a> (right-click and save as &lt;some_name&gt;.csv)'
        st.markdown(href, unsafe_allow_html=True)
    def load_data(data, day=t_days, profit=profit_m):

        input_data = pd.read_csv(data)

        input_data = pd.DataFrame(input_data.iloc[:, 1:])

        #Pareto Model

        pareto_model = lifetimes.ParetoNBDFitter(penalizer_coef=0.1)
        pareto_model.fit(input_data["frequency"], input_data["recency"],
                         input_data["T"])
        input_data[
            "p_not_alive"] = 1 - pareto_model.conditional_probability_alive(
                input_data["frequency"], input_data["recency"],
                input_data["T"])
        input_data["p_alive"] = pareto_model.conditional_probability_alive(
            input_data["frequency"], input_data["recency"], input_data["T"])
        t = days
        input_data[
            "predicted_purchases"] = pareto_model.conditional_expected_number_of_purchases_up_to_time(
                t, input_data["frequency"], input_data["recency"],
                input_data["T"])

        #Gamma Gamma Model

        idx = input_data[(input_data["frequency"] <= 0.0)]
        idx = idx.index
        input_data = input_data.drop(idx, axis=0)
        m_idx = input_data[(input_data["monetary_value"] <= 0.0)].index
        input_data = input_data.drop(m_idx, axis=0)

        input_data.reset_index().drop("index", axis=1, inplace=True)

        ggf_model = lifetimes.GammaGammaFitter(penalizer_coef=0.1)

        ggf_model.fit(input_data["frequency"], input_data["monetary_value"])

        input_data[
            "expected_avg_sales_"] = ggf_model.conditional_expected_average_profit(
                input_data["frequency"], input_data["monetary_value"])

        input_data["predicted_clv"] = ggf_model.customer_lifetime_value(
            pareto_model,
            input_data["frequency"],
            input_data["recency"],
            input_data["T"],
            input_data["monetary_value"],
            time=30,
            freq='D',
            discount_rate=0.01)

        input_data["profit_margin"] = input_data["predicted_clv"] * profit

        input_data = input_data.reset_index().drop("index", axis=1)

        #K-Means Model

        col = [
            "predicted_purchases", "expected_avg_sales_", "predicted_clv",
            "profit_margin"
        ]

        new_df = input_data[col]

        k_model = KMeans(n_clusters=4,
                         init="k-means++",
                         n_jobs=-1,
                         max_iter=1000).fit(new_df)

        labels = k_model.labels_

        labels = pd.Series(labels, name="Labels")

        input_data = pd.concat([input_data, labels], axis=1)

        label_mapper = dict({0: "Low", 3: "Medium", 1: "High", 2: "V_High"})

        input_data["Labels"] = input_data["Labels"].map(label_mapper)

        #saving the input data in the separate variable

        download = input_data

        st.write(input_data)

        #adding a count bar chart

        fig = alt.Chart(input_data).mark_bar().encode(y="Labels:N",
                                                      x="count(Labels):Q")

        #adding a annotation to the chart

        text = fig.mark_text(align="left", baseline="middle",
                             dx=3).encode(text="count(Labels):Q")

        chart = (fig + text)

        #showing the chart

        st.altair_chart(chart, use_container_width=True)

        #creating a button to download the result

        if st.button("Download"):
            st.write(
                "Successfully Downloaded!!! Please Check Your Default Download Location...:smile:"
            )
            return download.to_csv("customer_lifetime_prediction_result.csv")