Exemplo n.º 1
0
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
from mlxtend.frequent_patterns import apriori, association_rules
from helpers.helpers import create_invoice_product_df

#The process of reading the data set.

df_ = pd.read_excel(
    r"C:\Users\LENOVO\PycharmProjects\DSMLBC4\datasets\online_retail_II.xlsx",
    sheet_name="Year 2010-2011")
df = df_.copy()
df.info()
df.head()

from helpers.helpers import check_df
check_df(df)

from helpers.helpers import crm_data_prep

df = crm_data_prep(df)
check_df(df)

#Country selection -> [Germany]
df_ger = df[df['Country'] == "Germany"]
check_df(df_ger)

df_ger.groupby(['Invoice', 'StockCode']).agg({"Quantity": "sum"}).head(100)

df_ger.groupby(['Invoice', 'StockCode']).agg({
    "Quantity": "sum"
}).unstack().iloc[0:6, 0:12]
Exemplo n.º 2
0
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
from mlxtend.frequent_patterns import apriori, association_rules
pd.set_option('display.max_columns', None)

# Hemen verimizi hazırla

df_ = pd.read_excel(
    r"C:\Users\Erkan\Desktop\DSMLBC-4\4.Hafta_26-29_Ocak Haftası\Ödevler ve Çalışmalar\online_retail_II.xlsx",
    sheet_name="Year 2010-2011")
df = df_.copy()
df.info()
df.head()

from helpers.helpers import check_df
check_df(df)

from helpers.helpers import crm_data_prep
df = crm_data_prep(df)
check_df(df)

df1 = df.copy()

df_gm = df[df["Country"] == "Germany"]
df_gm.head()

df = df_gm.copy()

df.groupby(["Invoice", "StockCode", "TotalPrice"]).agg({
    "Quantity": "max",
    "Price": "sum"
Exemplo n.º 3
0
pd.set_option('display.max_columns', None)

#veri okuma işlemi
data = pd.read_excel(
    r"C:\Users\Suleakcay\PycharmProjects\pythonProject6\datasets\online_retail_II.xlsx",
    sheet_name="Year 2010-2011")
df = data.copy()
df.info()

########################
#Data Preprocessing
########################

from helpers.helpers import check_df
check_df(df)  #verinin detayını aldık
from helpers.helpers import crm_data_prep
#eksik değerleri uçurma,düzeltme işlemi ve hsaplana yaptık

df = crm_data_prep(df)
check_df(df)  #veri temizleme işlemini gerçekleştirdik

df_fr = df[df['Country'] == "Germany"]
check_df(df_fr)  #Sadece Germany için bilgi işlemlerini gerçekleştridm
#shape (541910,8)
#NA Description -> 1454
#Customer ID ->135080

#invoiceları tekilleştirdik Quantity lere göre sum larını aldım(herbir faturada ne kadar ürün olduğu
#burada her ürün tekilleşti faturalar için bir şey diyemeyeceğiz
df_fr.groupby(['Invoice', 'StockCode']).agg({"Quantity": "sum"}).head(200)
Exemplo n.º 4
0
df.info()
df_mysql.info()

df_mysql["InvoiceDate"] = pd.to_datetime(df_mysql["InvoiceDate"])
df_mysql.rename(columns={"CustomerID": "Customer ID"}, inplace=True)

df.head()

from helpers.helpers import crm_data_prep
df_prep = crm_data_prep(df)

df_prep.head()

from helpers.helpers import check_df

check_df(df_prep)


def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    ## recency kullanıcıya özel dinamik.
    rfm = dataframe.groupby('Customer ID').agg({
        'InvoiceDate': [
            lambda date: (date.max() - date.min()).days, lambda date:
            (today_date - date.min()).days
        ],
        'Invoice':
        lambda num: num.nunique(),
        'TotalPrice':
        lambda TotalPrice: TotalPrice.sum()
Exemplo n.º 5
0
# pip install mlxtend
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
from mlxtend.frequent_patterns import apriori, association_rules

# Hemen verimizi hatırlayalım özlemişizdir.

df_ = pd.read_excel("datasets/online_retail_II.xlsx",
                    sheet_name="Year 2010-2011")
df = df_.copy()
df.info()
df.head()

from helpers.helpers import check_df
check_df(df)

from helpers.helpers import crm_data_prep

df = crm_data_prep(df)
check_df(df)

df_fr = df[df['Country'] == "France"]
check_df(df_fr)

df_fr.groupby(['Invoice', 'StockCode']).agg({"Quantity": "sum"}).head(100)

df_fr.groupby(['Invoice', 'StockCode']).agg({
    "Quantity": "sum"
}).unstack().iloc[0:5, 0:5]
Exemplo n.º 6
0
from lifetimes import BetaGeoFitter
from lifetimes import GammaGammaFitter
from helpers.helpers import check_df, outlier_thresholds, replace_with_thresholds, crm_data_prep

pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

df_ = pd.read_excel("datasets/online_retail_II.xlsx",
                    sheet_name="Year 2010-2011",
                    engine="openpyxl")
df = df_.copy()

# Data Preperation

check_df(df)
df_prep = crm_data_prep(df)
check_df(df_prep)


# RFM Segmentation
def create_rfm(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    rfm = dataframe.groupby('Customer ID').agg({
        'InvoiceDate':
        lambda date: (today_date - date.max()).days,
        'Invoice':
        lambda num: num.nunique(),
        "TotalPrice":
        lambda price: price.sum()