def calc_clv(clv_recs, end, months=12): df = pandas.DataFrame(clv_recs) df = df[['player_id', 'start_date', 'theo_win']] df['theo_win'] = df['theo_win'].astype(float) end_date = parse(end) summary = summary_data_from_transaction_data(df, 'player_id', 'start_date', monetary_value_col='theo_win', observation_period_end=end_date) bgf = BetaGeoFitter(penalizer_coef=0.0) bgf.fit(summary['frequency'], summary['recency'], summary['T']) ggf = GammaGammaFitter(penalizer_coef = 0) ggf.fit(summary['frequency'], summary['monetary_value']) ggf_clv = ggf.customer_lifetime_value( bgf, #the model to use to predict the number of future transactions summary['frequency'], summary['recency'], summary['T'], summary['monetary_value'], time=months, discount_rate=0.0 ) clv_df = pandas.DataFrame(ggf_clv) clv_df=clv_df.dropna() clv_df[clv_df['clv']<0] = 0.0 summary=summary.merge(clv_df, left_index=True, right_index=True, how='inner') return summary
def clv(pareto, mbg, summary): returning_customers_summary = summary[summary['frequency'] > 0] ggf = GammaGammaFitter(penalizer_coef=0.0) ggf.fit(frequency=returning_customers_summary['frequency'], monetary_value=returning_customers_summary['monetary_value']) pred_clv_pareto = ggf.customer_lifetime_value( transaction_prediction_model=pareto, frequency=summary['frequency'], recency=summary['recency'], T=summary['T'], monetary_value=summary['monetary_value'], time=12, freq="D") pred_clv_mbg = ggf.customer_lifetime_value( transaction_prediction_model=mbg, frequency=summary['frequency'], recency=summary['recency'], T=summary['T'], monetary_value=summary['monetary_value'], time=12, freq="D") return pred_clv_pareto, pred_clv_mbg
def create_cltv_pred(dataframe, w=4, m=1): """ Gamagama and BGNBD model and prediction Parameters ---------- dataframe w: int, week information for BGNBD model m: int, month information for gamama model Returns Dataframe ------- """ # BGNBD dataframe = dataframe[dataframe["monetary_avg"] > 0] dataframe["frequency"] = dataframe["frequency"].astype(int) bgf = BetaGeoFitter(penalizer_coef=0.001) bgf.fit(dataframe['frequency'], dataframe['recency_weekly'], dataframe['T_weekly']) dataframe[f'exp_sales_{w}_week'] = bgf.predict(w, dataframe['frequency'], dataframe['recency_weekly'], dataframe['T_weekly']) # Gamagama - expected_average_profit ggf = GammaGammaFitter(penalizer_coef=0.001) ggf.fit(dataframe['frequency'], dataframe['monetary_avg']) dataframe[ "expected_average_profit"] = ggf.conditional_expected_average_profit( dataframe['frequency'], dataframe['monetary_avg']) # CLTV Prediction cltv = ggf.customer_lifetime_value(bgf, dataframe['frequency'], dataframe['recency_weekly'], dataframe['T_weekly'], dataframe['monetary_avg'], time=m, freq="W", discount_rate=0.01) dataframe[f'cltv_p_{m}_month'] = cltv scaler = MinMaxScaler(feature_range=(1, 100)) dataframe['cltv_p_score'] = scaler.fit_transform( dataframe[[f'cltv_p_{m}_month']]) # cltv_p Segment dataframe['cltv_p_segment'] = pd.qcut(dataframe['cltv_p_score'], 3, labels=['C', 'B', 'A']) new_col = dataframe.columns[~dataframe.columns. isin(['recency', 'frequency', 'monetary'])] dataframe = dataframe[new_col] return dataframe
class transactionMonetary(object): def summary_trans_create(self, df): ''' Subset df on sales data, return trans summary with monetary spend ''' sales = subset_data(df, 'OrderType', 1) sales = sales[sales.OrderTotal>0] transaction_data_monetary = sales[['OrderDate', 'CustomerNo', 'OrderTotal']] self.summary_monetary = summary_data_from_transaction_data(transaction_data_monetary, 'CustomerNo', 'OrderDate', 'OrderTotal', observation_period_end='2017-02-08') #keep customers with more than one spend self.return_customers = self.summary_monetary[self.summary_monetary['frequency']>0] return self.return_customers def fit_ggf(self): self.ggf = GammaGammaFitter(penalizer_coef = 0) self.ggf.fit(self.return_customers['frequency'], self.return_customers['monetary_value']) def summaryOutput(self, discount_rate=0.12, months=12): ''' Fit beta geometric model to calculate CLV, and use GG model to calculate expected profit Per customer Write out CLV and profits to csv, print out averages to screen ''' beta_model = BetaGeoFitter() #calulate average transaction value self.summary_monetary['avg_transaction_value'] = self.ggf.conditional_expected_average_profit( self.summary_monetary['frequency'], self.summary_monetary['monetary_value']) #fit beta geo model beta_model.fit(self.summary_monetary['frequency'], self.summary_monetary['recency'], self.summary_monetary['T']) #calculate clv, with discount rate calulated over year (default) disc_rate = discount_rate/months/30 self.summary_monetary['clv'] = self.ggf.customer_lifetime_value( beta_model, #the model to use to predict the number of future transactions self.summary_monetary['frequency'], self.summary_monetary['recency'], self.summary_monetary['T'], self.summary_monetary['monetary_value'], time=months, # months discount_rate=disc_rate # monthly discount rate ~ 12.7% annually ) #print customer data with calculations self.summary_monetary.to_csv("CLV_AVG_transactionValue_perCustomer.csv", index=False) #print summary stats print("Expected conditional average profit: {}, Average profit: {}".format( self.ggf.conditional_expected_average_profit( self.summary_monetary['frequency'], self.summary_monetary['monetary_value']).mean(), self.summary_monetary[self.summary_monetary['frequency']>0]['monetary_value'].mean()))
def gg_model(rfmmod, bgf, p, f): # Build the Model ret_cust = rfmmod[(rfmmod['frequency'] > 0) & (rfmmod['monetary_value'] > 0)] ggf = GammaGammaFitter(penalizer_coef=p) ggf.fit(ret_cust['frequency'], ret_cust['monetary_value']) pred_clt = ggf.customer_lifetime_value( bgf, ret_cust['frequency'], ret_cust['recency'], ret_cust['T'], ret_cust['monetary_value'], time=12, # months freq=f, discount_rate=0.01) ret_cust['predicted_cltv'] = pred_clt ret_cust['exp_profit'] = ggf.conditional_expected_average_profit( ret_cust['frequency'], ret_cust['monetary_value']) ret_cust = ret_cust.sort_values('predicted_cltv', ascending=False).round(3) return ret_cust
# dtype: float64 rfm_cltv["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm_cltv["Frequency"], rfm_cltv["monetary_avg"]) rfm_cltv.sort_values("expected_average_profit", ascending=False).head(20) # ################################################################## # CLTV PREDICTION by combining BG/NBD and GAMMA-GAMMA MODEL # ################################################################## cltv = ggf.customer_lifetime_value(bgf, rfm_cltv["Frequency"], rfm_cltv["Recency_weekly"], rfm_cltv["T_weekly"], rfm_cltv["monetary_avg"], time=3, # for 3 months freq="W", # weekly frequency discount_rate=0.01) cltv.shape # (4338,) cltv = cltv.reset_index() cltv.sort_values(by="clv", ascending=False).head(10) # ⭐ 10 Most Valuable Customers! # Customer ID clv # 2678 16000 11794.07113 # 2087 15195 4641.70350 # 715 13298 1140.24615 # 2011 15098 988.92748
rfm.sort_values("expected_average_profit", ascending=False).head(20) # geçmiş ayların ortalama satış sayılarını bulma? ############################################################## # 4. BG-NBD ve GG modeli ile CLTV'nin hesaplanması. ############################################################## # GÖREV - 1 # 2010-2011 UK müşterileri için 6 aylık CLTV prediction cltv = ggf.customer_lifetime_value(bgf, rfm['frequency'], rfm['recency_weekly_p'], rfm['tenure_weekly_p'], rfm['monetary_avg'], time=6, freq="W", discount_rate=0.01) cltv.head() cltv.shape cltv = cltv.reset_index() cltv.sort_values(by="clv", ascending=False).head(50) rfm_cltv_final = rfm.merge(cltv, on="Customer ID", how="left") rfm_cltv_final.sort_values(by="clv", ascending=False).head(7) rfm_cltv_final[rfm_cltv_final["Customer ID"] == 12748.00000] # GÖREV - 2 # 2010-2011 UK müşterileri için 1 aylık ve 12 aylık CLTV prediction # - 1 aylık CLTV'de en yüksek olan 10 kişi ile 12 aylık'taki en yüksek 10 kişiyi analiz ediniz. # - Fark var mı? Varsa sizce neden olabilir?
def generate_clv_table(data, clv_prediction_time=None, model_penalizer=None): #set default values if they are not stated if clv_prediction_time is None: clv_prediction_time = 12 if model_penalizer is None: model_penalizer = 0 # Reformat csv as a Pandas dataframe #data = pd.read_csv(csv_file) #Remove non search sessions data = data[data['Searches'] > 0] max_date = data['activity_date'].max() # Using "summary_data_from_transaction_data" function to agregate the activity stream into the appropriate metrics # Model requires 'activity_date' column name. For our purpose this is synonymous with submission_date. summary = summary_data_from_transaction_data( data, 'client_id', 'activity_date', 'Revenue', observation_period_end=max_date) # Building the Model using BG/NBD bgf = BetaGeoFitter(penalizer_coef=model_penalizer) bgf.fit(summary['frequency'], summary['recency'], summary['T']) # Conditional expected purchases # These are the expected purchases expected from each individual given the time specified # t = days in to future t = 14 summary[ 'predicted_searches'] = bgf.conditional_expected_number_of_purchases_up_to_time( t, summary['frequency'], summary['recency'], summary['T']) #Conditional Alive Probability summary['alive_prob'] = summary.apply( lambda row: calc_alive_prob(row, bgf), axis=1) summary['alive_prob'] = summary['alive_prob'].astype(float) #print summary['alive_prob'] # There cannot be non-positive values in the monetary_value or frequency vector summary_with_value_and_returns = summary[(summary['monetary_value'] > 0) & (summary['frequency'] > 0)] # There cannot be zero length vectors in one of frequency, recency or T #summary_with_value_and_returns = #print summary_with_value_and_returns[ # (len(summary_with_value_and_returns['recency'])>0) & # (len(summary_with_value_and_returns['frequency'])>0) & # (len(summary_with_value_and_returns['T'])>0) #] if any( len(x) == 0 for x in [ summary_with_value_and_returns['recency'], summary_with_value_and_returns['frequency'], summary_with_value_and_returns['T'] ]): logger.debug(data['client_id']) # Setting up Gamma Gamma model ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(summary_with_value_and_returns['frequency'], summary_with_value_and_returns['monetary_value']) # Output average profit per tranaction by client ID ggf_output = ggf.conditional_expected_average_profit( summary_with_value_and_returns['frequency'], summary_with_value_and_returns['monetary_value']) # Refitting the BG/NBD model with the same data if frequency, recency or T are not zero length vectors if not (len(x) == 0 for x in [ summary_with_value_and_returns['recency'], summary_with_value_and_returns['frequency'], summary_with_value_and_returns['T'] ]): bgf.fit(summary_with_value_and_returns['frequency'], summary_with_value_and_returns['recency'], summary_with_value_and_returns['T']) # Getting Customer lifetime value using the Gamma Gamma output # NOTE: the time can be adjusted, but is currently set to 12 months customer_predicted_value = ggf.customer_lifetime_value( bgf, #the model to use to predict the number of future transactions summary_with_value_and_returns['frequency'], summary_with_value_and_returns['recency'], summary_with_value_and_returns['T'], summary_with_value_and_returns['monetary_value'], time=clv_prediction_time, # months discount_rate=0.01 # monthly discount rate ~ 12.7% annually ) # Converting to dataframe df_cpv = pd.DataFrame({ 'client_id': customer_predicted_value.index, 'pred_values': customer_predicted_value.values }) # Setting client_id as index df_cpv = df_cpv.set_index('client_id') # Merge with original summary df_merged = pd.merge(summary, df_cpv, left_index=True, right_index=True, how='outer') # Historical CLV data_hist = data.groupby( ['client_id'])['Searches', 'Revenue'].apply(lambda x: x.astype(float).sum()) # Merge with original summary df_final = pd.merge(df_merged, data_hist, left_index=True, right_index=True, how='outer') # Prevent NaN on the pred_clv column df_final.pred_values[df_final.frequency == 0] = 0.0 # Create column that combines historical and predicted customer value df_final['total_clv'] = df_final['pred_values'] + df_final['Revenue'] # Create column which calculates in days the number of days since they were last active df_final['last_active'] = df_final['T'] - df_final['recency'] # Create a column which labels users inactive over 14 days as "Expired" ELSE "Active" df_final['user_status'] = np.where(df_final['last_active'] > 14, 'Expired', 'Active') # Add column with date of calculation # Set calc_date to max submission date df_final['calc_date'] = max_date.date() #pd.Timestamp('today').date() # Rename columns as appropriate df_final.columns = [ 'frequency', 'recency', 'customer_age', 'avg_session_value', 'predicted_searches_14_days', 'alive_probability', 'predicted_clv_12_months', 'historical_searches', 'historical_clv', 'total_clv', 'days_since_last_active', 'user_status', 'calc_date' ] #Prevent non returning customers from having 100% alive probability df_final.alive_probability[df_final.frequency == 0] = 0.0 return df_final
ggf.conditional_expected_average_profit(df["FREQUENCY"], df["MONETARY_VALUE"]).head(10) print("Expected conditional average profit: %s, Average profit: %s" % (ggf.conditional_expected_average_profit(df["FREQUENCY"], df["MONETARY_VALUE"]).mean(), df[df["FREQUENCY"] > 0]["MONETARY_VALUE"].mean())) bgf.fit(df["FREQUENCY"], df["RECENCY"], df["T"]) pred = ggf.customer_lifetime_value( bgf, #the model to use to predict the number of future transactions df["FREQUENCY"], df["RECENCY"], df["T"], df["MONETARY_VALUE"], time=1, # year discount_rate=0.02 # campaignly discount rate ~ 20% annually ) pred.head(10) pred.tail(10) pred.mean() pred.median() df["MONETARY_VALUE"].mean() df["T"].mean() df[df["T"] < 14]["T"].count() df[df["T"] > 13]["T"].count()
returning_customers_summary = returning_customers_summary[returning_customers_summary['monetary_value']>0] returning_customers_summary['predicted_avg_sales']=ggf.conditional_expected_average_profit(returning_customers_summary['frequency'],returning_customers_summary['monetary_value']) # checking the expevred average value and the actual average value in the data to make sure the values are good print(f"Expected Average sales: {returning_customers_summary['predicted_avg_sales'].mean()}") print(f"Actual Average sales: {returning_customers_summary['monetary_value'].mean()}") # The values seem to be fine #calculating CLV for 1 month returning_customers_summary['Predicted_CLV'] = ggf.customer_lifetime_value(bgf, returning_customers_summary['frequency'], returning_customers_summary['recency'], returning_customers_summary['T'], returning_customers_summary['monetary_value'], time=1, # lifetime in months freq='D', # frequency in which data is present (T), discount_rate=0.01 #discount rate ) # calculate CLV manual #returning_customers_summary['manual_predict_clv']= returning_customers_summary['predicted_num_purchases'] * returning_customers_summary['predicted_avg_sales'] #calculate CLV profit profit_margin=0.05 returning_customers_summary['profit_CLV'] =returning_customers_summary['Predicted_CLV'] * profit_margin ############ THE END OF CLV ###################
def create_cltv_p(dataframe): today_date = dt.datetime(2011, 12, 11) # recency user-specific rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max() - date.min()).days, # "recency_cltv_p" lambda date: (today_date - date.min()).days], # "T" 'Invoice': lambda num: num.nunique(), # "frequency" 'TotalPrice': lambda TotalPrice: TotalPrice.sum()}) # "monetary" rfm.columns = rfm.columns.droplevel(0) # recency_cltv_p rfm.columns = ["recency_cltv_p", "T", "frequency", "monetary"] # Simplified monetary_avg (since Gamma-Gamma model requires this way) rfm["monetary"] = rfm["monetary"] / rfm["frequency"] rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True) # Calculating WEEKLY RECENCY VE WEEKLY T for BG/NBD MODEL # recency_weekly_cltv_p rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7 rfm["T_weekly"] = rfm["T"] / 7 # CHECK IT OUT! Monetary avg must be positive rfm = rfm[rfm["monetary_avg"] > 0] # recency filter rfm = rfm[(rfm["frequency"] > 1)] rfm["frequency"] = rfm["frequency"].astype(int) # converting it to integer just in case! # Establishing the BGNBD Model bgf = BetaGeoFitter(penalizer_coef=0.01) bgf.fit(rfm["frequency"], rfm["recency_weekly_cltv_p"], rfm["T_weekly"]) # exp_sales_1_month rfm["exp_sales_1_month"] = bgf.predict(4, rfm["frequency"], rfm["recency_weekly_cltv_p"], rfm["T_weekly"]) # exp_sales_3_month rfm["exp_sales_3_month"] = bgf.predict(12, rfm["frequency"], rfm["recency_weekly_cltv_p"], rfm["T_weekly"]) # Establishing Gamma-Gamma Model calculates=> Expected Average Profit ggf = GammaGammaFitter(penalizer_coef=0.01) ggf.fit(rfm["frequency"], rfm["monetary_avg"]) rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm["frequency"], rfm["monetary_avg"]) # CLTV Pred for 6 months cltv = ggf.customer_lifetime_value(bgf, rfm["frequency"], rfm["recency_weekly_cltv_p"], rfm["T_weekly"], rfm["monetary_avg"], time=6, freq="W", discount_rate=0.01) rfm["cltv_p"] = cltv # Minmaxscaler scaler = MinMaxScaler(feature_range=(1, 100)) scaler.fit(rfm[["cltv_p"]]) rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]]) # rfm.fillna(0, inplace=True) # cltv_p_segment rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"]) # recency_cltv_p, recency_weekly_cltv_p rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly", "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit", "cltv_p", "cltv_p_segment"]] return rfm
def create_cltv_p(dataframe): today_date = dt.datetime(2011, 12, 11) rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (today_date - date.max()).days, lambda date: (today_date - date.min()).days], 'Invoice': lambda num: num.nunique(), 'TotalPrice': lambda TotalPrice: TotalPrice.sum()}) rfm.columns = rfm.columns.droplevel(0) rfm.columns = ['recency', 'T', 'frequency', 'monetary'] # CALCULATION OF MONETARY AVG & ADDING RFM INTO DF temp_df = dataframe.groupby(["Customer ID", "Invoice"]).agg({"TotalPrice": ["mean"]}) temp_df = temp_df.reset_index() temp_df.columns = temp_df.columns.droplevel(0) temp_df.columns = ["Customer ID", "Invoice", "total_price_mean"] temp_df2 = temp_df.groupby(["Customer ID"], as_index=False).agg({"total_price_mean": ["mean"]}) temp_df2.columns = temp_df2.columns.droplevel(0) temp_df2.columns = ["Customer ID", "monetary_avg"] rfm = rfm.merge(temp_df2, how="left", on="Customer ID") rfm.set_index("Customer ID", inplace=True) rfm.index = rfm.index.astype(int) # CALCULATION OF WEEKLY RECENCY AND WEEKLY T FOR BGNBD rfm["recency_weekly"] = rfm["recency"] / 7 rfm["T_weekly"] = rfm["T"] / 7 # CONTROL rfm = rfm[rfm["monetary_avg"] > 0] rfm["frequency"] = rfm["frequency"].astype(int) # BGNBD bgf = BetaGeoFitter(penalizer_coef=0.001) bgf.fit(rfm['frequency'], rfm['recency_weekly'], rfm['T_weekly']) # exp_sales_1_month rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'], rfm['recency_weekly'], rfm['T_weekly']) # exp_sales_3_month rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'], rfm['recency_weekly'], rfm['T_weekly']) # expected_average_profit ggf = GammaGammaFitter(penalizer_coef=0.001) ggf.fit(rfm['frequency'], rfm['monetary_avg']) rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'], rfm['monetary_avg']) # 6 MONTHS cltv_p cltv = ggf.customer_lifetime_value(bgf, rfm['frequency'], rfm['recency_weekly'], rfm['T_weekly'], rfm['monetary_avg'], time=6, freq="W", discount_rate=0.01) rfm["cltv_p"] = cltv # minmaxscaler scaler = MinMaxScaler(feature_range=(1, 100)) scaler.fit(rfm[["cltv_p"]]) rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]]) # cltv_p_segment rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"]) rfm = rfm[["monetary_avg", "T", "recency_weekly", "T_weekly", "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit", "cltv_p", "cltv_p_segment"]] return rfm
rfm['monetary_avg']).sort_values(ascending=False).head(10) rfm["expected_average_profit"] = ggf.conditional_expected_average_profit( rfm['frequency'], rfm['monetary_avg']) rfm.sort_values("expected_average_profit", ascending=False).head(20) ############################################################## # 4. BG-NBD ve GG modeli ile CLTV'nin hesaplanması. ############################################################## cltv = ggf.customer_lifetime_value( bgf, rfm['frequency'], rfm['recency_weekly_p'], rfm['T_weekly'], rfm['monetary_avg'], time=3, # 3 aylık freq="W", # T'nin frekans bilgisi. discount_rate=0.01) cltv.head() cltv.shape cltv = cltv.reset_index() cltv.sort_values(by="clv", ascending=False).head(50) rfm_cltv_final = rfm.merge(cltv, on="Customer ID", how="left") rfm_cltv_final.head() # Bundan sonra ne olur? # Holdout yöntemi ile zamana göre benchmark yapılması gerekir.
def create_cltv_p(dataframe): today_date = dt.datetime(2011, 12, 11) ## recency for users dinamic. rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max() - date.min()).days, lambda date: (today_date - date.min()).days], 'Invoice': lambda num: num.nunique(), 'TotalPrice': lambda TotalPrice: TotalPrice.sum()}) rfm.columns = rfm.columns.droplevel(0) ## recency_cltv_p rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary'] ## simplified monetary_avg rfm["monetary"] = rfm["monetary"] / rfm["frequency"] rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True) # BGNBD CALCULATE WEEKLY RECENCY AND WEEKLY T for ## recency_weekly_cltv_p rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7 rfm["T_weekly"] = rfm["T"] / 7 # CONTROL rfm = rfm[rfm["monetary_avg"] > 0] ## recency filtre (cltv_p for much better calculation) rfm = rfm[(rfm['frequency'] > 1)] rfm["frequency"] = rfm["frequency"].astype(int) # BGNBD bgf = BetaGeoFitter(penalizer_coef=0.01) bgf.fit(rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # exp_sales_1_month rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # exp_sales_3_month rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # expected_average_profit ggf = GammaGammaFitter(penalizer_coef=0.01) ggf.fit(rfm['frequency'], rfm['monetary_avg']) rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'], rfm['monetary_avg']) # 6 months cltv_p cltv = ggf.customer_lifetime_value(bgf, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly'], rfm['monetary_avg'], time=6, freq="W", discount_rate=0.01) rfm["cltv_p"] = cltv # minmaxscaler scaler = MinMaxScaler(feature_range=(1, 100)) scaler.fit(rfm[["cltv_p"]]) rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]]) # cltv_p_segment rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"]) ## recency_cltv_p, recency_weekly_cltv_p rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly", "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit", "cltv_p", "cltv_p_segment"]] return rfm
bgf.fit(rfm["frequency"], rfm["recency_weekly_p"], rfm["T_weekly"]) # BGNBD fitted. ######################################## # Gamma Gamma ######################################## ggf = GammaGammaFitter(penalizer_coef=0.01) # Gamma Gamma created. ggf.fit(rfm["frequency"], rfm["monetary_avg"]) # Gamma gamma fitted. # 6 Months CLTV Prediction cltv_6_months = ggf.customer_lifetime_value(bgf, rfm['frequency'], rfm['recency_weekly_p'], rfm['T_weekly'], rfm['monetary_avg'], time=6, freq="W", discount_rate=0.01) cltv_6_months = cltv_6_months.reset_index( ) # indexes are broken. Reset_index fixed it. cltv_6_months.sort_values(by="clv", ascending=False) # 1 Month CLTV Prediction cltv_1_month = ggf.customer_lifetime_value(bgf, rfm['frequency'], rfm['recency_weekly_p'], rfm['T_weekly'], rfm['monetary_avg'],
def create_cltv_p(dataframe): today_date = dt.datetime(2011, 12, 11) # rfm metriklerini + tenure oluşturma rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max()-date.min()).days, lambda date: (today_date-date.min()).days], 'Invoice': lambda num: num.nunique(), 'TotalPrice': lambda TotalPrice: TotalPrice.sum()}) rfm.columns.droplevel(0) rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary'] # basitleştirilmiş monetary_avg rfm["monetary"] = rfm["monetary"] / rfm["frequency"] rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True) # bgnbd için haftalık recency,tenure hesaplanması rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7 rfm["T_weekly"] = rfm["T"] / 7 # kontrol rfm = rfm[rfm["monetary_avg"] > 0] rfm = rfm[(rfm['frequency'] > 1)] rfm["frequency"] = rfm["frequency"].astype(int) # bgnbd bgf = BetaGeoFitter(penalizer_coef=0.01) bgf.fit(rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # exp_sales_1_month rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # exp_sales_3_month rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # expected_avg_profit ggf = GammaGammaFitter(penalizer_coef=0.01) ggf.fit(rfm['frequency'], rfm['monetary_avg']) rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'], rfm['monetary_avg']) # 6 aylık cltv_p cltv = ggf.customer_lifetime_value(bgf, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly'], rfm['monetary_avg'], time=6, freq="W", discount_rate=0.01) rfm["cltv_p"] = cltv # minmaxscaler scaler = MinMaxScaler(feature_range=(1, 100)) scaler.fit(rfm[["cltv_p"]]) rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]]) # cltv_p_segment rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"]) ## recency_cltv_p, recency_weekly_cltv_p rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly", "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit", "cltv_p", "cltv_p_segment"]] return rfm
def create_cltv_p(dataframe): today_date = dt.datetime(2011, 12, 11) rfm = dataframe.groupby('Customer ID').agg({ 'InvoiceDate': [ lambda date: (date.max() - date.min()).days, lambda date: (today_date - date.min()).days ], 'Invoice': lambda num: num.nunique(), 'TotalPrice': lambda price: price.sum() }) rfm.columns = rfm.columns.droplevel(0) rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary'] rfm['monetary'] = rfm['monetary'] / rfm['frequency'] rfm.rename(columns={'monetary': 'monetary_avg'}, inplace=True) rfm["recency_weekly_cltv_p"] = rfm['recency_cltv_p'] / 7 rfm['T_weekly'] = rfm['T'] / 7 rfm = rfm[rfm['monetary_avg'] > 0] rfm = rfm[(rfm['frequency'] > 1)] rfm['frequency'] = rfm['frequency'].astype(int) #BGNBD bgf = BetaGeoFitter(penalizer_coef=0.01) bgf.fit(rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) #Gamma Gamma ggf = GammaGammaFitter(penalizer_coef=0.01) ggf.fit(rfm['frequency'], rfm['monetary_avg']) rfm["expected_average_profit"] = ggf.conditional_expected_average_profit( rfm['frequency'], rfm['monetary_avg']) cltv = ggf.customer_lifetime_value(bgf, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly'], rfm['monetary_avg'], time=6, freq='W', discount_rate=0.01) rfm["cltv_p"] = cltv scaler = MinMaxScaler(feature_range=(1, 100)) scaler.fit(rfm[["cltv_p"]]) rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]]) rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"]) rfm = rfm[[ "recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly", "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit", "cltv_p", "cltv_p_segment" ]] return rfm
# GAMMA-GAMMA ##### ggf = GammaGammaFitter (penalizer_coef=0.01) ggf.fit (cltv["frequency"], cltv["monetary_avg"]) cltv["expected_average_profit"] = ggf.conditional_expected_average_profit (cltv["frequency"], cltv["monetary_avg"]) cltv.sort_values (by="expected_average_profit", ascending=False).head () ########### # 4. CLTV calculation with BG-NBD and GG models ########### cltv["cltv_six_months"] = ggf.customer_lifetime_value (bgf, cltv["frequency"], cltv["recency_weekly"], cltv["T_weekly"], cltv["monetary_avg"], time=6, discount_rate=0.01, freq="W") ### Best 5 customers for expected CLTV for 6 months cltv.sort_values (by="cltv_six_months", ascending=False).head () cltv.describe () plot_cltv = cltv.quantile ([0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1]) import seaborn as sns import matplotlib.pyplot as plt sns.scatterplot (x=plot_cltv.index, y=plot_cltv["cltv_six_months"], data=plot_cltv)
gg = GammaGammaFitter(penalizer_coef=0.001) gg.fit(customer_detail['frequency'], customer_detail['avg_order_value'], verbose=True) print( gg.conditional_expected_average_profit( customer_detail['frequency'], customer_detail['avg_order_value']).head(10)) # In[16]: customer_detail['clv'] = gg.customer_lifetime_value( mbgnbd, customer_detail['frequency'], customer_detail['recency'], customer_detail['T'], customer_detail['avg_order_value'], time=t, discount_rate=0).astype(int) customer_detail[[ 'frequency', 'pred_90d_bgf', 'monetary', 'avg_order_value', 'clv' ]].head() # In[17]: customer_detail['exp_orders'] = ( customer_detail['clv'] / gg.conditional_expected_average_profit( customer_detail['frequency'], customer_detail['avg_order_value'])).astype(int) customer_detail['potential'] = 100 - ( (100 / customer_detail['clv']) * customer_detail['monetary'])
with_frequency.head() with_frequency[['monetary_value', 'frequency']].corr() from lifetimes import GammaGammaFitter ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(with_frequency['frequency'], with_frequency['monetary_value']) ggf ggf.conditional_expected_average_profit(data['frequency'], data['monetary_value']).head(20) "Expected conditional average profit: %s, Average profit: %s" % ( ggf.conditional_expected_average_profit(data['frequency'], data['monetary_value']).mean(), data[data['frequency'] > 0]['monetary_value'].mean()) bgf.fit(data['frequency'], data['recency'], data['T']) ggf.customer_lifetime_value( bgf, #the model to use to predict the number of future transactions data['frequency'], data['recency'], data['T'], data['monetary_value'], time=2, # months discount_rate=0.1 # monthly discount rate ).head(10)
def create_cltv_p(dataframe): today_date = dt.datetime(2011, 12, 11) # recency kullanıcıya özel dinamik. rfm = dataframe.groupby('Customer ID').agg({ 'InvoiceDate': [ lambda date: (date.max() - date.min()).days, lambda date: (today_date - date.min()).days ], 'Invoice': lambda num: num.nunique(), 'TotalPrice': lambda TotalPrice: TotalPrice.sum() }) rfm.columns = rfm.columns.droplevel(0) # recency_cltv_p rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary'] # basitleştirilmiş monetary_avg rfm["monetary"] = rfm["monetary"] / rfm["frequency"] rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True) # BGNBD için WEEKLY RECENCY VE WEEKLY T'nin HESAPLANMASI # recency_weekly_cltv_p rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7 rfm["T_weekly"] = rfm["T"] / 7 # KONTROL rfm = rfm[rfm["monetary_avg"] > 0] # recency filtre (daha saglıklı cltvp hesabı için) rfm = rfm[(rfm['frequency'] > 1)] rfm["frequency"] = rfm["frequency"].astype(int) # BGNBD bgf = BetaGeoFitter(penalizer_coef=0.01) bgf.fit(rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # exp_sales_1_month rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # exp_sales_3_month rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # expected_average_profit ggf = GammaGammaFitter(penalizer_coef=0.01) ggf.fit(rfm['frequency'], rfm['monetary_avg']) rfm["expected_average_profit"] = ggf.conditional_expected_average_profit( rfm['frequency'], rfm['monetary_avg']) # 6 aylık cltv_p cltv = ggf.customer_lifetime_value(bgf, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly'], rfm['monetary_avg'], time=6, freq="W", discount_rate=0.01) rfm["cltv_p"] = cltv # minmaxscaler scaler = MinMaxScaler(feature_range=(1, 100)) scaler.fit(rfm[["cltv_p"]]) rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]]) # rfm.fillna(0, inplace=True) # cltv_p_segment rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"]) # recency_cltv_p, recency_weekly_cltv_p rfm = rfm[[ "recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly", "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit", "cltv_p", "cltv_p_segment" ]] return rfm
# Tahmin edilen monetary değeri combined_data'ya ekleme combined_data["monetary_value_predict"] = monetary_pred combined_data.head() ############################################################## # CLV MODEL ############################################################## # Bu model expected purchase tahmini alacak ve expected purchase value ile birleştirecektir. # Belirli bir süre içinde bir müşterinin ne kadar değerli olduğuna dair bir tahmine ulaşılmasını sağlar. clv = ggf.customer_lifetime_value( bgf, #the model to use to predict the number of future transactions combined_data['frequency_cal'], combined_data['recency_cal'], combined_data['T_cal'], combined_data['monetary_value_cal'], time=4, # months freq="D", # T'nin frekans bilgisi discount_rate=0.01) clv.head() combined_data["CLV"] = clv combined_data.head(20) # Bunlar ilk 10 en değerli müşteri önümüzdeki 4 ay için combined_data.sort_values('CLV', ascending=False).head(10) # CLV modelinin performansını nasıl değerlendiririz? # Simple bir baseline ile karşılaştırabiliriz. # Target olarak en iyi müşterilerin %20 sini seçelim.
ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(returning_customers_summary['frequency'], returning_customers_summary['monetary_value']) print(ggf) # estimate the average transaction value print( ggf.conditional_expected_average_profit(data['frequency'], data['monetary_value']).head(10)) # refit the BG model to the summary_with_money_value dataset bgf.fit(data['frequency'], data['recency'], data['T']) CLV_12M = ggf.customer_lifetime_value( bgf, # the model to use to predict the number of future transactions data['frequency'], data['recency'], data['T'], data['monetary_value'], time=12, # months discount_rate=0.01 # monthly discount rate ~ 12.7% annually ) CLV_12M = pd.DataFrame({ customer_id: CLV_12M.index, 'CLV_12_months': CLV_12M.values }) print(CLV_12M.head(10)) CLV_12M.to_csv('CLV.csv', index=False)
ggf = GammaGammaFitter(penalizer_coef=0.0) ggf.fit(summary_ggf['frequency'], summary_ggf['monetary_value']) ggf.conditional_expected_average_profit(summary_ggf['frequency'], summary_ggf['monetary_value']).head(10) bgf.fit(summary_ggf['frequency'], summary_ggf['recency'], summary_ggf['T']) bgf.fit(summary_ggf['frequency'], summary_ggf['recency'], summary_ggf['T']) ggf.customer_lifetime_value( bgf, #the model to use to predict the number of future transactions summary_ggf['frequency'], summary_ggf['recency'], summary_ggf['T'], summary_ggf['monetary_value'], time=12, # months discount_rate=0.01 # monthly discount rate ~ 12.7% annually ).head(10) ggf_CLV = ggf.customer_lifetime_value( bgf, #the model to use to predict the number of future transactions summary_ggf['frequency'], summary_ggf['recency'], summary_ggf['T'], summary_ggf['monetary_value'], time=12, # months discount_rate=0.01 # monthly discount rate ~ 12.7% annually )
def get_clv(oracle_conn_id, src_client_id, storage_bucket, ds, **context): import matplotlib.pyplot matplotlib.pyplot.ioff() ## from lifetimes.utils import calibration_and_holdout_data from lifetimes.plotting import plot_frequency_recency_matrix from lifetimes.plotting import plot_probability_alive_matrix from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases from lifetimes.plotting import plot_period_transactions from lifetimes.plotting import plot_history_alive from lifetimes.plotting import plot_cumulative_transactions from lifetimes.utils import expected_cumulative_transactions from lifetimes.utils import summary_data_from_transaction_data from lifetimes import BetaGeoFitter from lifetimes import GammaGammaFitter import datetime import pandas as pd import datalab.storage as gcs conn = OracleHook(oracle_conn_id=oracle_conn_id).get_conn() print(src_client_id, context) query = context['templates_dict']['query'] data = pd.read_sql(query, con=conn) data.columns = data.columns.str.lower() print(data.head()) # Calculate RFM values# calibration_end_date = datetime.datetime(2018, 5, 24) training_rfm = calibration_and_holdout_data( transactions=data, customer_id_col='src_user_id', datetime_col='pickup_date', calibration_period_end=calibration_end_date, freq='D', monetary_value_col='price_total') bgf = BetaGeoFitter(penalizer_coef=0.0) bgf.fit(training_rfm['frequency_cal'], training_rfm['recency_cal'], training_rfm['T_cal']) print(bgf) # Matrix charts plot_period_transactions_chart = context.get("ds_nodash") + str( src_client_id) + '_plot_period_transactions_chart.svg' plot_frequency_recency_chart = context.get("ds_nodash") + str( src_client_id) + '_plot_frequency_recency_matrix.svg' plot_probability_chart = context.get("ds_nodash") + str( src_client_id) + '_plot_probability_alive_matrix.svg' plot_calibration_vs_holdout_chart = context.get("ds_nodash") + str( src_client_id) + '_plot_calibration_vs_holdout_purchases.svg' ax0 = plot_period_transactions(bgf, max_frequency=30) ax0.figure.savefig(plot_period_transactions_chart, format='svg') ax1 = plot_frequency_recency_matrix(bgf) ax1.figure.savefig(plot_frequency_recency_chart, format='svg') ax2 = plot_probability_alive_matrix(bgf) ax2.figure.savefig(plot_probability_chart, format='svg') ax3 = plot_calibration_purchases_vs_holdout_purchases(bgf, training_rfm, n=50) ax3.figure.savefig(plot_calibration_vs_holdout_chart, format='svg') full_rfm = summary_data_from_transaction_data( data, customer_id_col='src_user_id', datetime_col='pickup_date', monetary_value_col='price_total', datetime_format=None, observation_period_end=None, freq='D') returning_full_rfm = full_rfm[full_rfm['frequency'] > 0] ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(returning_full_rfm['frequency'], returning_full_rfm['monetary_value']) customer_lifetime = 30 # expected number of months lifetime of a customer clv = ggf.customer_lifetime_value( bgf, #the model to use to predict the number of future transactions full_rfm['frequency'], full_rfm['recency'], full_rfm['T'], full_rfm['monetary_value'], time=customer_lifetime, # months discount_rate=0.01 # monthly discount rate ~ 12.7% annually ).sort_values(ascending=False) full_rfm_with_value = full_rfm.join(clv) full_rfm_file = context.get("ds_nodash") + "-src_client_id-" + str( src_client_id) + '-icabbi-test.csv' full_rfm_with_value.to_csv(full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + full_rfm_file, filename=full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + plot_period_transactions_chart, filename=full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + plot_frequency_recency_chart, filename=full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + plot_probability_chart, filename=full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + plot_calibration_vs_holdout_chart, filename=full_rfm_file)
returning_customers_summary['frequency'], returning_customers_summary['monetary_value']) AVG_Profit = pd.Series(AVG_Profit) ############################### Customer Life Time Value Calculationn ########## # refit the BG model to the summary_with_money_value dataset, #the model to use to predict the number of future transactions from lifetimes import BetaGeoFitter bgf = BetaGeoFitter(penalizer_coef=0.0) bgf.fit(returning_customers_summary['frequency'], returning_customers_summary['recency'], returning_customers_summary['T']) CLV_1Year = ggf.customer_lifetime_value( bgf, returning_customers_summary['frequency'], returning_customers_summary['recency'], returning_customers_summary['T'], returning_customers_summary['monetary_value'], time=12, freq='D') CLV_1Year = pd.Series(CLV_1Year) ################# Churn Probability ############################### # probability of being churn: model is going to predict customer churn, i.e probability of customer being dead or probability that a customer will leave alive = bgf.conditional_probability_alive( returning_customers_summary['frequency'], returning_customers_summary['recency'], returning_customers_summary['T']) ################ Final Output ############################### returning_customers_summary2 = returning_customers_summary.copy() returning_customers_summary2['Churn_Probability'] = 1 - alive returning_customers_summary2['AVG_SALE'] = AVG_Profit