def test_plot_frequency_recency_matrix(self): from matplotlib import pyplot as plt plt.figure() plotting.plot_frequency_recency_matrix(bgf) plt.figure() plotting.plot_frequency_recency_matrix(bgf, max_recency=100, max_frequency=50) plt.show()
def test_plot_frequency_recency_matrix(self): from matplotlib import pyplot as plt plt.figure() plotting.plot_frequency_recency_matrix(BG) plt.figure() plotting.plot_frequency_recency_matrix(BG, max_t=100, max_x=50) plt.show()
def test_plot_frequency_recency_matrix_max_frequency_max_recency( self, bgf): shape = (101, 101) row_idx = 95 row = [ 0.002, 0.008, 0.017, 0.025, 0.034, 0.043, 0.052, 0.060, 0.069, 0.078, 0.087, 0.096, 0.105, 0.114, 0.123, 0.132, 0.140, 0.149, 0.158, 0.166, 0.175, 0.184, 0.192, 0.201, 0.209, 0.218, 0.226, 0.235, 0.243, 0.251, 0.259, 0.267, 0.275, 0.283, 0.291, 0.299, 0.307, 0.314, 0.322, 0.330, 0.337, 0.344, 0.352, 0.359, 0.366, 0.373, 0.379, 0.386, 0.393, 0.399, 0.405, 0.411, 0.417, 0.423, 0.429, 0.435, 0.440, 0.445, 0.450, 0.455, 0.460, 0.465, 0.469, 0.473, 0.477, 0.481, 0.484, 0.488, 0.491, 0.494, 0.497, 0.499, 0.501, 0.503, 0.505, 0.506, 0.508, 0.509, 0.509, 0.510, 0.510, 0.510, 0.510, 0.509, 0.508, 0.507, 0.506, 0.504, 0.503, 0.501, 0.498, 0.496, 0.493, 0.490, 0.486, 0.483, 0.479, 0.475, 0.471, 0.466, 0.462 ] ax = plotting.plot_frequency_recency_matrix(bgf, max_frequency=100, max_recency=100) ar = ax.get_images()[0].get_array() assert_array_equal(ar.shape, shape) assert_allclose(ar[row_idx, :].data, row, atol=0.01) # only test one row for brevity assert_equal( ax.title.get_text(), "Expected Number of Future Purchases for 1 Unit of Time,\nby Frequency and Recency of a Customer" ) assert_equal(ax.xaxis.get_label().get_text(), "Customer's Historical Frequency") assert_equal(ax.yaxis.get_label().get_text(), "Customer's Recency") plt.close()
def test_plot_frequency_recency_matrix_max_frequency(self, bgf): shape = (39, 101) row_idx = 35 row = [ 0.005, 0.021, 0.041, 0.061, 0.082, 0.103, 0.125, 0.146, 0.167, 0.188, 0.208, 0.229, 0.250, 0.270, 0.290, 0.310, 0.330, 0.349, 0.369, 0.388, 0.406, 0.425, 0.443, 0.460, 0.478, 0.495, 0.511, 0.528, 0.543, 0.559, 0.573, 0.587, 0.601, 0.614, 0.627, 0.639, 0.650, 0.660, 0.670, 0.679, 0.688, 0.695, 0.702, 0.708, 0.713, 0.718, 0.721, 0.724, 0.726, 0.727, 0.727, 0.726, 0.724, 0.721, 0.718, 0.713, 0.708, 0.702, 0.695, 0.687, 0.679, 0.670, 0.660, 0.649, 0.638, 0.627, 0.615, 0.602, 0.589, 0.575, 0.562, 0.548, 0.533, 0.519, 0.504, 0.489, 0.475, 0.460, 0.445, 0.430, 0.416, 0.401, 0.387, 0.372, 0.359, 0.345, 0.331, 0.318, 0.305, 0.293, 0.280, 0.269, 0.257, 0.246, 0.235, 0.224, 0.214, 0.204, 0.195, 0.186, 0.177 ] ax = plotting.plot_frequency_recency_matrix(bgf, max_frequency=100) ar = ax.get_images()[0].get_array() assert_array_equal(ar.shape, shape) assert_allclose(ar[row_idx, :].data, row, atol=0.01) # only test one row for brevity assert_equal( ax.title.get_text(), "Expected Number of Future Purchases for 1 Unit of Time,\nby Frequency and Recency of a Customer" ) assert_equal(ax.xaxis.get_label().get_text(), "Customer's Historical Frequency") assert_equal(ax.yaxis.get_label().get_text(), "Customer's Recency") plt.close()
def test_plot_frequency_recency_matrix_max_recency(self, bgf): shape = (101, 30) col_idx = 25 col = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.001, 0.001, 0.002, 0.002, 0.004, 0.005, 0.007, 0.010, 0.014, 0.018, 0.024, 0.032, 0.041, 0.052, 0.065, 0.080, 0.096, 0.112, 0.129, 0.145, 0.160, 0.174, 0.186, 0.196, 0.205, 0.212, 0.218, 0.222, 0.226, 0.229, 0.232, 0.233 ] ax = plotting.plot_frequency_recency_matrix(bgf, max_recency=100) ar = ax.get_images()[0].get_array() assert_array_equal(ar.shape, shape) assert_allclose(ar[:, col_idx].data, col, atol=0.01) # only test one row for brevity assert_equal( ax.title.get_text(), "Expected Number of Future Purchases for 1 Unit of Time,\nby Frequency and Recency of a Customer" ) assert_equal(ax.xaxis.get_label().get_text(), "Customer's Historical Frequency") assert_equal(ax.yaxis.get_label().get_text(), "Customer's Recency") plt.close()
def viz_bgf(self, t): #visualize customer frequency and recency matrix plot_frequency_recency_matrix(self.bgf, T=t, cmap='coolwarm') plt.savefig('sales_frequency_recency_matrix.png') plt.close() #visualize customer alive probability plot_probability_alive_matrix(self.bgf, cmap='coolwarm') plt.savefig('probability_alive_matrix.png') plt.close() #visualize expected repeat Purchases plot_expected_repeat_purchases(self.bgf) plt.savefig('ProbabilityExpectedRepeatPurchases.png') plt.close() #visualize the expected number of period transactions plot_period_transactions(self.bgf) plt.savefig('period_transactions.png') plt.close()
def test_plot_frequency_recency_matrix(self, bgf): shape = (39, 30) row_idx = 29 row = [0.005, 0.020, 0.037, 0.054, 0.070, 0.085, 0.099, 0.110, 0.120, 0.127, 0.133, 0.136, 0.136, 0.135, 0.131, 0.125, 0.119, 0.111, 0.102, 0.093, 0.084, 0.075, 0.066, 0.058, 0.050, 0.044, 0.038, 0.032, 0.027, 0.023] ax = plotting.plot_frequency_recency_matrix(bgf) ar = ax.get_images()[0].get_array() assert_array_equal(ar.shape, shape) assert_allclose(ar[row_idx, :].data, row, atol=0.01) # only test one row for brevity assert_equal(ax.title.get_text(), "Expected Number of Future Purchases for 1 Unit of Time,\nby Frequency and Recency of a Customer") assert_equal(ax.xaxis.get_label().get_text(), "Customer's Historical Frequency") assert_equal(ax.yaxis.get_label().get_text(), "Customer's Recency") plt.close()
#Load the dataset from csv file and view the contents of the data clv = pd.read_csv("C:\\data\\ecommercesales.csv") clv.head(6) # #### Step 3: Transform transactional data for CLV analysis # In[14]: from lifetimes.utils import summary_data_from_transaction_data clvsum = summary_data_from_transaction_data( clv, 'InvoiceDate', 'CustID', observation_period_end='2016-01-01') print clvsum.head(100) # In[ ]: print clvsum.tail() # Step 4: Fit data to the Beta-geometric / NBD model # In[4]: Betageo = BetaGeoFitter() Betageo.fit(clvsum['frequency'], clvsum['recency'], clvsum['T']) # ### Frequency / Recency Matrix # In[5]: from lifetimes.plotting import plot_frequency_recency_matrix plot_frequency_recency_matrix(Betageo)
def get_clv(oracle_conn_id, src_client_id, storage_bucket, ds, **context): import matplotlib.pyplot matplotlib.pyplot.ioff() ## from lifetimes.utils import calibration_and_holdout_data from lifetimes.plotting import plot_frequency_recency_matrix from lifetimes.plotting import plot_probability_alive_matrix from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases from lifetimes.plotting import plot_period_transactions from lifetimes.plotting import plot_history_alive from lifetimes.plotting import plot_cumulative_transactions from lifetimes.utils import expected_cumulative_transactions from lifetimes.utils import summary_data_from_transaction_data from lifetimes import BetaGeoFitter from lifetimes import GammaGammaFitter import datetime import pandas as pd import datalab.storage as gcs conn = OracleHook(oracle_conn_id=oracle_conn_id).get_conn() print(src_client_id, context) query = context['templates_dict']['query'] data = pd.read_sql(query, con=conn) data.columns = data.columns.str.lower() print(data.head()) # Calculate RFM values# calibration_end_date = datetime.datetime(2018, 5, 24) training_rfm = calibration_and_holdout_data( transactions=data, customer_id_col='src_user_id', datetime_col='pickup_date', calibration_period_end=calibration_end_date, freq='D', monetary_value_col='price_total') bgf = BetaGeoFitter(penalizer_coef=0.0) bgf.fit(training_rfm['frequency_cal'], training_rfm['recency_cal'], training_rfm['T_cal']) print(bgf) # Matrix charts plot_period_transactions_chart = context.get("ds_nodash") + str( src_client_id) + '_plot_period_transactions_chart.svg' plot_frequency_recency_chart = context.get("ds_nodash") + str( src_client_id) + '_plot_frequency_recency_matrix.svg' plot_probability_chart = context.get("ds_nodash") + str( src_client_id) + '_plot_probability_alive_matrix.svg' plot_calibration_vs_holdout_chart = context.get("ds_nodash") + str( src_client_id) + '_plot_calibration_vs_holdout_purchases.svg' ax0 = plot_period_transactions(bgf, max_frequency=30) ax0.figure.savefig(plot_period_transactions_chart, format='svg') ax1 = plot_frequency_recency_matrix(bgf) ax1.figure.savefig(plot_frequency_recency_chart, format='svg') ax2 = plot_probability_alive_matrix(bgf) ax2.figure.savefig(plot_probability_chart, format='svg') ax3 = plot_calibration_purchases_vs_holdout_purchases(bgf, training_rfm, n=50) ax3.figure.savefig(plot_calibration_vs_holdout_chart, format='svg') full_rfm = summary_data_from_transaction_data( data, customer_id_col='src_user_id', datetime_col='pickup_date', monetary_value_col='price_total', datetime_format=None, observation_period_end=None, freq='D') returning_full_rfm = full_rfm[full_rfm['frequency'] > 0] ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(returning_full_rfm['frequency'], returning_full_rfm['monetary_value']) customer_lifetime = 30 # expected number of months lifetime of a customer clv = ggf.customer_lifetime_value( bgf, #the model to use to predict the number of future transactions full_rfm['frequency'], full_rfm['recency'], full_rfm['T'], full_rfm['monetary_value'], time=customer_lifetime, # months discount_rate=0.01 # monthly discount rate ~ 12.7% annually ).sort_values(ascending=False) full_rfm_with_value = full_rfm.join(clv) full_rfm_file = context.get("ds_nodash") + "-src_client_id-" + str( src_client_id) + '-icabbi-test.csv' full_rfm_with_value.to_csv(full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + full_rfm_file, filename=full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + plot_period_transactions_chart, filename=full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + plot_frequency_recency_chart, filename=full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + plot_probability_chart, filename=full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + plot_calibration_vs_holdout_chart, filename=full_rfm_file)
def test_plot_frequency_recency_matrix_max_frequency_max_recency(self): plt.figure() plotting.plot_frequency_recency_matrix(bgf, max_frequency=100, max_recency=100) return plt.gcf()
import lifetimes from lifetimes import BetaGeoFitter from lifetimes.plotting import plot_frequency_recency_matrix from lifetimes.plotting import plot_probability_alive_matrix import pandas as pd data = pd.read_csv('lifetimes') bgf = BetaGeoFitter(penalizer_coef=0.0) bgf.fit(data['frequency'], data['recency'], data['T']) print bgf plot_frequency_recency_matrix(bgf) #plot_probability_alive_matrix(bgf)
plot_probability_alive_matrix(model) display() # COMMAND ---------- # MAGIC %md In addition to predicting the probability a customer is still alive, we can calculate the number of purchases expected from a customer over a given future time interval, such as over the next 30-days: # COMMAND ---------- from lifetimes.plotting import plot_frequency_recency_matrix # set figure size plt.subplots(figsize=(12, 8)) plot_frequency_recency_matrix(model, T=30) display() # COMMAND ---------- # MAGIC %md As before, we can calculate this probability for each customer based on their current metrics: # COMMAND ---------- filtered_pd['purchases_next30days'] = ( model.conditional_expected_number_of_purchases_up_to_time( 30, filtered_pd['frequency'], filtered_pd['recency'], filtered_pd['T'])) filtered_pd.head(10)
on='customer_id') df_final['wholesaler'] = np.where(df_final['predicted_cltv'] < 1000, 0, 1) df_final['churn_group'] = np.where(df_final['probability_alive'] < .5, 0, 1) df_final # Plots and Validation plot_period_transactions(bgf_mod) cal_hold = calibration_and_holdout_data( df, 'customer_id', 'date', calibration_period_end='2018-12-31', #3 years calibration observation_period_end='2020-12-31', #2 year holdout freq=frq) # plots the efficiacy of the model using the hold-out period plt.rcParams['figure.figsize'] = (20, 10) bgf = BetaGeoFitter() bgf.fit(cal_hold['frequency_cal'], cal_hold['recency_cal'], cal_hold['T_cal']) plot_calibration_purchases_vs_holdout_purchases(bgf, cal_hold) fig = plt.figure(figsize=(8, 6)) plot_frequency_recency_matrix(bgf_mod) fig = plt.figure(figsize=(8, 6)) plot_probability_alive_matrix(bgf_mod)
fit_method='Nelder-Mead') mbgnbd.summary # In[8]: from lifetimes import BetaGeoFitter bgf = BetaGeoFitter(penalizer_coef=0.0001) bgf.fit(customer_detail['frequency'], customer_detail['recency'], customer_detail['T']) bgf.summary # In[9]: #from lifetimes.plotting import plot_probability_alive_matrix from lifetimes.plotting import plot_frequency_recency_matrix plot_frequency_recency_matrix(mbgnbd) # In[10]: from lifetimes.plotting import plot_period_transactions plot_period_transactions(bgf) # In[11]: t = 90 # days to predict in the future customer_detail[ 'pred_90d_bgf'] = bgf.conditional_expected_number_of_purchases_up_to_time( t, customer_detail['frequency'], customer_detail['recency'], customer_detail['T']) customer_detail.sort_values(by='pred_90d_bgf').tail(5)
#modeldata['frequency'].plot(kind='hist', bins=50) #print(modeldata['frequency'].describe()) #percentage of customer with no [repeat] order print(sum(modeldata['frequency'] == 0)/float(len(modeldata))) #dfnew[dfnew.CustomerID == 12346.0] from lifetimes import BetaGeoFitter #parameter eliminates overfitting and noise and robust bgf = BetaGeoFitter(penalizer_coef=0.0) bgf.fit(modeldata['frequency'], modeldata['recency'], modeldata['T']) print(bgf) # create frequency recency matrix from lifetimes.plotting import plot_frequency_recency_matrix plot_frequency_recency_matrix(bgf) #plot for predict purchase from lifetimes.plotting import plot_probability_alive_matrix #probability of being alive modeldata['Churn_probability'] = bgf.conditional_probability_alive(modeldata['frequency'], modeldata['recency'], modeldata['T']) #plot for churn or probability of being alive fig = plt.figure(figsize=(12,8)) plot_probability_alive_matrix(bgf) # predict number of purchase customer will make t = 30 #number of days to predict customer will make purchase modeldata['predicted_num_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time(t, modeldata['frequency'], modeldata['recency'], modeldata['T']) modeldata.sort_values(by='predicted_num_purchases').tail(5)
def visualizeFrequencyRecencyMatrix(betaGeoFitterModel): plot_frequency_recency_matrix(betaGeoFitterModel) pylab.savefig("FrequencyRecencyMatrixPlot.png")
def test_plot_frequency_recency_matrix(self): plt.figure() plotting.plot_frequency_recency_matrix(bgf) return plt.gcf()
summary['monetary_value'].astype(int).head() from lifetimes import BetaGeoFitter bgf = BetaGeoFitter(penalizer_coef=0.0) bgf.fit(summary['frequency'], summary['recency'], summary['T']) print(bgf) bgf.summary from lifetimes.plotting import plot_frequency_recency_matrix import matplotlib.pyplot as plt plt.rcParams['figure.figsize'] = [10, 10] plot_frequency_recency_matrix(bgf, title="") from lifetimes.plotting import plot_probability_alive_matrix plot_probability_alive_matrix(bgf, title="") t = 12 summary[ 'predicted_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time( t, summary['frequency'], summary['recency'], summary['T']) summary.sort_values(by='predicted_purchases').tail(10) from lifetimes.plotting import plot_period_transactions plt.rcParams['figure.figsize'] = [12, 3] plot_period_transactions(bgf)
#%% X_test.to_csv('matrix.csv') #%% from lifetimes import BetaGeoFitter # similar API to scikit-learn and lifelines. bgf = BetaGeoFitter(penalizer_coef=0.0) bgf.fit(X_train['txn_total'], X_train['recency_true']/7, X_train['T']/7) print(bgf) %matplotlib inline from lifetimes.plotting import plot_frequency_recency_matrix plot_frequency_recency_matrix(bgf) #%% from lifetimes.plotting import plot_probability_alive_matrix f=plot_probability_alive_matrix(bgf) t=52 X_train['predicted_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time( t, X_train['txn_total'], X_train['recency_true']/7, X_train['T']/7) #%% from lifetimes.plotting import plot_period_transactions f = plot_period_transactions(bgf) #%%