Пример #1
0
def create_rules(dataframe, country=False, head=5):
    if country:
        dataframe = dataframe[dataframe["Country"] == country]
        dataframe = create_invoice_product_df(dataframe)
        frequent_itemsets = apriori(dataframe, min_support=0.01, use_colnames=True)
        rules = association_rules(frequent_itemsets, metric="support", min_threshold=0.01)
        print(rules.sort_values("lift", ascending=False).head(head))
    else:
        dataframe = create_invoice_product_df(dataframe)
        frequent_items = apriori(dataframe, min_support=0.01, use_colnames=True)
        rules = association_rules(frequent_items, metric="support",min_threshold=0.01)
        print(rules.sort_values("lift", ascending=False).head(head))
    return rules
Пример #2
0
# Control
df[(df["StockCode"] == 16016) & (df["Invoice"] == 536983)]


df_ger.groupby(['Invoice', 'StockCode']).\
    agg({"Quantity": "sum"}).\
    unstack().fillna(0).iloc[0:6, 0:12]

# Apply, satır ve sütuna göre(axis özelinde seçim yapılır) applymap ise tüm elemanlara göre çalışır.
df_ger.groupby(['Invoice', 'StockCode']).\
    agg({"Quantity": "sum"}).\
    unstack().fillna(0).\
    applymap(lambda x: 1 if x > 0 else 0).iloc[0:6, 0:12]

# fillna(0) fills NA values with 0.
ger_inv_pro_df = create_invoice_product_df(df_ger)

ger_inv_pro_df.head()

# How many unique products are in each invoice?
new_df_ger = df_ger.groupby(['Invoice', 'Description']).agg({'Quantity': "sum"}).fillna(0).\
        applymap(lambda x: 1 if x > 0 else 0)
new_df_ger.head(30)
new_df_ger.reset_index(inplace=True)
new_df_ger.head(30)
new_df_ger.groupby('Invoice').agg({'Quantity': "sum"})

# How many unique baskets are each product in?
df_ger.groupby("Description").agg({"Invoice": "nunique"})

############################################
Пример #3
0
df[(df["StockCode"] == 16235) & (df["Invoice"] == 538174)]
df[(df["StockCode"] == 17003) & (df["Invoice"] == 537894)]

#fiilna() -> boşluklara sıfır koyduk
df_fr.groupby(['Invoice', 'StockCode']).\
    agg({"Quantity": "sum"}).\
    unstack().fillna(0).iloc[0:5, 0:5] #nan değerlerine 0 koyduk yukarıdaki çıktıyı elde etmek için!


def create_invoice_product_df(dataframe):  #matrisi dataframe olarak aldık
    #StockCode
    return dataframe.groupby(['Invoice', 'Description'])['Quantity'].sum().unstack().fillna(0). \
        applymap(lambda x: 1 if x > 0 else 0)


germany_inv_pro_df = create_invoice_product_df(df_fr)
#!!!! 1  ve 0 lar bir sepette ne kadar ürün olup olmadığını gösteriyor
germany_inv_pro_df.head(220)

############################################
# Birliktelik Kurallarının Çıkarılması
############################################
#apriori fonskiynu bize itemlerin frekanslarını verecek #supportları hesapladık
frequent_itemsets = apriori(germany_inv_pro_df,
                            min_support=0.01,
                            use_colnames=True)
frequent_itemsets.sort_values("support", ascending=False)

#supportlara göre, min_thresholda göre kuralları çıkardık
rules = association_rules(frequent_itemsets,
                          metric="support",