pd.set_option('display.max_columns', None) pd.set_option('display.expand_frame_repr', False) from mlxtend.frequent_patterns import apriori, association_rules from helpers.helpers import create_invoice_product_df #The process of reading the data set. df_ = pd.read_excel( r"C:\Users\LENOVO\PycharmProjects\DSMLBC4\datasets\online_retail_II.xlsx", sheet_name="Year 2010-2011") df = df_.copy() df.info() df.head() from helpers.helpers import check_df check_df(df) from helpers.helpers import crm_data_prep df = crm_data_prep(df) check_df(df) #Country selection -> [Germany] df_ger = df[df['Country'] == "Germany"] check_df(df_ger) df_ger.groupby(['Invoice', 'StockCode']).agg({"Quantity": "sum"}).head(100) df_ger.groupby(['Invoice', 'StockCode']).agg({ "Quantity": "sum" }).unstack().iloc[0:6, 0:12]
pd.set_option('display.max_columns', None) pd.set_option('display.expand_frame_repr', False) from mlxtend.frequent_patterns import apriori, association_rules pd.set_option('display.max_columns', None) # Hemen verimizi hazırla df_ = pd.read_excel( r"C:\Users\Erkan\Desktop\DSMLBC-4\4.Hafta_26-29_Ocak Haftası\Ödevler ve Çalışmalar\online_retail_II.xlsx", sheet_name="Year 2010-2011") df = df_.copy() df.info() df.head() from helpers.helpers import check_df check_df(df) from helpers.helpers import crm_data_prep df = crm_data_prep(df) check_df(df) df1 = df.copy() df_gm = df[df["Country"] == "Germany"] df_gm.head() df = df_gm.copy() df.groupby(["Invoice", "StockCode", "TotalPrice"]).agg({ "Quantity": "max", "Price": "sum"
pd.set_option('display.max_columns', None) #veri okuma işlemi data = pd.read_excel( r"C:\Users\Suleakcay\PycharmProjects\pythonProject6\datasets\online_retail_II.xlsx", sheet_name="Year 2010-2011") df = data.copy() df.info() ######################## #Data Preprocessing ######################## from helpers.helpers import check_df check_df(df) #verinin detayını aldık from helpers.helpers import crm_data_prep #eksik değerleri uçurma,düzeltme işlemi ve hsaplana yaptık df = crm_data_prep(df) check_df(df) #veri temizleme işlemini gerçekleştirdik df_fr = df[df['Country'] == "Germany"] check_df(df_fr) #Sadece Germany için bilgi işlemlerini gerçekleştridm #shape (541910,8) #NA Description -> 1454 #Customer ID ->135080 #invoiceları tekilleştirdik Quantity lere göre sum larını aldım(herbir faturada ne kadar ürün olduğu #burada her ürün tekilleşti faturalar için bir şey diyemeyeceğiz df_fr.groupby(['Invoice', 'StockCode']).agg({"Quantity": "sum"}).head(200)
df.info() df_mysql.info() df_mysql["InvoiceDate"] = pd.to_datetime(df_mysql["InvoiceDate"]) df_mysql.rename(columns={"CustomerID": "Customer ID"}, inplace=True) df.head() from helpers.helpers import crm_data_prep df_prep = crm_data_prep(df) df_prep.head() from helpers.helpers import check_df check_df(df_prep) def create_cltv_p(dataframe): today_date = dt.datetime(2011, 12, 11) ## recency kullanıcıya özel dinamik. rfm = dataframe.groupby('Customer ID').agg({ 'InvoiceDate': [ lambda date: (date.max() - date.min()).days, lambda date: (today_date - date.min()).days ], 'Invoice': lambda num: num.nunique(), 'TotalPrice': lambda TotalPrice: TotalPrice.sum()
# pip install mlxtend import pandas as pd pd.set_option('display.max_columns', None) pd.set_option('display.expand_frame_repr', False) from mlxtend.frequent_patterns import apriori, association_rules # Hemen verimizi hatırlayalım özlemişizdir. df_ = pd.read_excel("datasets/online_retail_II.xlsx", sheet_name="Year 2010-2011") df = df_.copy() df.info() df.head() from helpers.helpers import check_df check_df(df) from helpers.helpers import crm_data_prep df = crm_data_prep(df) check_df(df) df_fr = df[df['Country'] == "France"] check_df(df_fr) df_fr.groupby(['Invoice', 'StockCode']).agg({"Quantity": "sum"}).head(100) df_fr.groupby(['Invoice', 'StockCode']).agg({ "Quantity": "sum" }).unstack().iloc[0:5, 0:5]
from lifetimes import BetaGeoFitter from lifetimes import GammaGammaFitter from helpers.helpers import check_df, outlier_thresholds, replace_with_thresholds, crm_data_prep pd.set_option('display.expand_frame_repr', False) pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None) df_ = pd.read_excel("datasets/online_retail_II.xlsx", sheet_name="Year 2010-2011", engine="openpyxl") df = df_.copy() # Data Preperation check_df(df) df_prep = crm_data_prep(df) check_df(df_prep) # RFM Segmentation def create_rfm(dataframe): today_date = dt.datetime(2011, 12, 11) rfm = dataframe.groupby('Customer ID').agg({ 'InvoiceDate': lambda date: (today_date - date.max()).days, 'Invoice': lambda num: num.nunique(), "TotalPrice": lambda price: price.sum()