示例#1
0
def test_filtered():
    """
    Test for filtered data.
    """
    transaction_manager = Mock(spec=TransactionManager)
    dummy_return = OrderedStatistic(
        frozenset(['A']), frozenset(['B']), 0.1, 0.7)
    def gen_support_records(*args, **kwargs): # pylint: disable=unused-argument
        """ Mock for apyori.gen_support_records. """
        yield dummy_return

    def gen_ordered_statistics(*_):
        """ Mock for apyori.gen_ordered_statistics. """
        yield dummy_return

    def filter_ordered_statistics(*args, **kwargs): # pylint: disable=unused-argument
        """ Mock for apyori.gen_ordered_statistics. """
        return iter([])

    result = list(apriori(
        transaction_manager,
        _gen_support_records=gen_support_records,
        _gen_ordered_statistics=gen_ordered_statistics,
        _filter_ordered_statistics=filter_ordered_statistics,
    ))
    eq_(result, [])
示例#2
0
def test_normal():
    """
    Test for normal data.
    """
    transaction_manager = Mock(spec=TransactionManager)
    min_support = 0.1
    min_confidence = 0.1
    min_lift = 0.5
    max_length = 2
    support_record = SupportRecord(frozenset(['A', 'B']), 0.5)
    ordered_statistic1 = OrderedStatistic(
        frozenset(['A']), frozenset(['B']), 0.1, 0.7)
    ordered_statistic2 = OrderedStatistic(
        frozenset(['A']), frozenset(['B']), 0.3, 0.5)

    def gen_support_records(*args, **kwargs):
        """ Mock for apyori.gen_support_records. """
        eq_(args[1], min_support)
        eq_(kwargs['max_length'], max_length)
        yield support_record

    def gen_ordered_statistics(*_):
        """ Mock for apyori.gen_ordered_statistics. """
        yield ordered_statistic1
        yield ordered_statistic2

    def filter_ordered_statistics(*args, **kwargs):
        """ Mock for apyori.gen_ordered_statistics. """
        eq_(kwargs['min_confidence'], min_confidence)
        eq_(kwargs['min_lift'], min_lift)
        eq_(len(list(args[0])), 2)
        yield ordered_statistic1

    result = list(apriori(
        transaction_manager,
        min_support=min_support,
        min_confidence=min_confidence,
        min_lift=min_lift,
        max_length=max_length,
        _gen_support_records=gen_support_records,
        _gen_ordered_statistics=gen_ordered_statistics,
        _filter_ordered_statistics=filter_ordered_statistics,
    ))
    eq_(result, [RelationRecord(
        support_record.items, support_record.support, [ordered_statistic1]
    )])
def myapriori():
    #set three levels of minimum support and confidence
    minsup_con = [(0.8,0.8),(0.75,0.75),(0.7,0.7)]
    #run the apriori algorithm
    for i,j in minsup_con:
        #read the data
        spotifydf = pd.read_csv("mySpotify_v3.csv")
        #drop the duplicate values
        spotifydf = spotifydf.drop_duplicates(subset = 'track_id',keep='first',inplace = False)
        #run apriori
        itemset = spotifydf[['parentCat','acousticness_bin','danceability_bin','energy_bin',
                      'instrumentalness_bin','liveness_bin','loudness_bin','speechiness_bin',
                      'tempo_bin','time_signature_bin','valence_bin']].values.tolist()
        #set the result as list
        results = list(apriori(itemset,min_support = i, min_confidence = j))
        aprioridf = pd.DataFrame(results)
    return(aprioridf)
示例#4
0
from apyori import apriori
import pandas as pd

MAX_RULES = 8

data = pd.read_csv('pizzaria.csv', header=None)

transactions = []

for t in data.values:
    transactions.append([str(item) for item in t])

regras = list(
    apriori(transactions,
            min_support=0.032,
            min_confidence=0.2,
            min_lift=1.5,
            min_length=2))

temp_results = [list(x) for x in regras]
total_rules = (len(temp_results))

formResult = []

for indice in range(0, total_rules):
    formResult.append([list(x) for x in temp_results[indice][2]])

for fr in formResult:
    print(fr)
# -*- coding: utf-8 -*-

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

veriler = pd.read_csv('sepet.csv', header=None)

t = []
for i in range(0, 7501):
    t.append([str(veriler.values[i, j]) for j in range(0, 20)])

from apyori import apriori

kurallar = apriori(t,
                   min_support=0.01,
                   min_confidence=0.2,
                   min_lift=3,
                   min_lenght=2)
print(list(kurallar))
示例#6
0
def reco(unit, support, confidence, lift):
    try:
        #called the prodreview dataset, now calculating transactions from it

        Transactions = pd.DataFrame(columns=['transactions'])

        transactions = []

        for unique_id, item in zip(unit.asin, unit.related):
            trans = []
            trans.append(unique_id)
            item_dict = ast.literal_eval(item)
            #print(item_dict)
            #print(type(item_dict))

            if "also_bought" in item_dict.keys():
                for i in item_dict['also_bought']:
                    trans.append(i)
            transactions.append(trans)

        Transactions['transactions'] = transactions

        trans_list = Transactions['transactions'].values.tolist(
        )  #transdf uploaded by user is converted to list

        # trans=[]
        # for j in trans_list:
        #     sub=j.split(',')
        #     trans.append(sub)

        #0.0085,0.4,3
        support = float(support)
        confidence = float(confidence)
        lift = float(lift)

        rules = apriori(trans_list,
                        min_support=support,
                        min_confidence=confidence,
                        min_lift=lift,
                        min_length=3)
        results = list(rules)

        output = []
        for row in results:
            output.append([
                str(row.items), "support=" + str(row.support),
                "confidence=" + str(row.ordered_statistics[0].confidence),
                "lift=" + str(row.ordered_statistics[0].lift)
            ])

        apriori_summary = pd.DataFrame(columns=('Items', 'Support',
                                                'Confidence', 'Lift'))

        Support = []
        Confidence = []
        Lift = []
        Items = []

        for row in results:
            Items.append(row.items)
            Support.append(row.support)
            Confidence.append(str(row.ordered_statistics[0].confidence))
            Lift.append(row.ordered_statistics[0].lift)

        apriori_summary['Items'] = Items
        apriori_summary['Support'] = Support
        apriori_summary['Confidence'] = Confidence
        apriori_summary['Lift'] = Lift

        #return HttpResponse(apriori_summary.to_html())

        #print(ProductReview['asin'])
        names = []
        for items in apriori_summary.Items:
            name = []
            for unique in items:
                temp = unit.loc[unit['asin'] == str(unique)]
                name.append(list(temp.title))
            names.append(name)

        name_df = pd.DataFrame(names)

        #print (names)

        AprioriResults = pd.DataFrame.join(apriori_summary,
                                           name_df,
                                           how='left')

        #print(type(AprioriResults_csv)) #string

        #with open('apriori.html', 'w') as fo:
        #   fo.write(AprioriResults.to_html())

        # AprioriResults_csv = AprioriResults.to_html()

        response = HttpResponse(content_type='text/csv')
        response[
            'Content-Disposition'] = 'attachment; filename="apriorisummary.csv"'
        AprioriResults.to_csv(path_or_buf=response,
                              float_format='%.4f',
                              index=False)
        return response

        #<a  href="D:\Anaconda3\envs\gputest\BE proj\AprioriResults" download> Download Document </a>
        #download_csv(AprioriResults)

        #return render(request,'apriori.html')
        #return HttpResponse(AprioriResults_csv)

        #return download_csv(AprioriResults)

    except ValueError as e:
        return (e.args[0])
示例#7
0
#REFERENCE https://stackabuse.com/association-rule-mining-via-apriori-algorithm-in-python/

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from apyori import apriori

uni_data = pd.read_csv('../Data/For-Profit-NOM.csv')

records = []

for i in range(0, 42):
    records.append([str(uni_data.values[i, j]) for j in range(0, 5)])

association_rules = apriori(records, min_support=0.3, min_confidence=0.5)

association_results = list(association_rules)

for uni in association_results:

    pair = uni[0]
    unis = [x for x in pair]

    print("Rule " + str(unis))
    print("Support : " + str(uni[1]))
    print("Confidence " + str(uni[2][0][2]))
    print("Lift: " + str(uni[2][0][3]))
    print("====================================")
示例#8
0
plt.xticks(x_index, labels, fontsize=10, rotation=45)
plt.ylabel('count')
#showing the bar graph
plt.show()

#created an empty list
new_L = []

#removing nan value and appending data in that empty list
for item in List_dataset:
    cleanedList = [x for x in item if str(x) != 'nan']
    new_L.append(cleanedList)

# Training Apriori on the dataset
rules = apriori(new_L, min_support=0.003, min_confidence=0.25, min_lift=4)

# Visualising the results
results = list(rules)

for item in results:

    # first index of the inner list
    # Contains base item and add item
    pair = item[0]
    items = [x for x in pair]
    print("Rule: " + items[0] + " -> " + items[1])

    #second index of the inner list
    print("Support: " + str(item[1]))
示例#9
0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from apyori import apriori

#here import the data
dataset=pd.read_csv('data.csv',header=None)
records=[] for i in range(0,101):
 records.append([str(dataset.values[i,j])for j in range (0,10)])

#here train the model
rules=apriori(records,min_support=0.1,min_confidence=0.2,min_lift=3,min_length=2)

results=list(rules)
示例#10
0
import pprint

data = pd.read_csv('data/transaction_data_custom.csv', header=None)
data = np.array(data)
data = pd.DataFrame({
    'transaction': data[:, 2],
    'label': data[:, 3],
})

data_grouped = data.groupby('transaction')
store_data = list()
for key, item in data_grouped:
    store_data.append(item['label'].values.tolist())

association_rules = list(
    apriori(store_data, min_support=0.1, min_confidence=0.3))
pprint.pprint(store_data)

print("===============================================")

for item in association_rules:
    pair = item[0]
    items = [x for x in pair]
    if len(items) < 2:
        print("Rule: " + items[0], end="")
    else:
        print("Rule: " + items[0], end="")
        for rule_item in items[1:]:
            print(" -> " + rule_item, end="")
    print("\nSupport: " + str(item[1]))
    print("Confidence: " + str(item[2][0][2]))
示例#11
0
df = df.drop(l1,axis=0)

x = df["Item"].value_counts().head(15)

top_name = x.index.tolist()
bought_freq = x.tolist()

plt.pie(bought_freq,autopct="%1.1f%%",labels = top_name,radius =2)

group = df.groupby("Transaction")["Item"].unique().tolist()
group1 = [x.tolist() for x in group]


# Training Apriori on the dataset

rules = apriori(group1, min_support = 0.0025, min_confidence = 0.2, min_lift = 3)

# Visualising the results
results = list(rules)


for item in results:

    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    print("Rule: " + items[0] + " -> " + items[1])

    #second index of the inner list
    print("Support: " + str(item[1]))
示例#12
0
from pprint import PrettyPrinter

MIN_SUPPORT = 0.1
MIN_CONFIDENCE = 0.8
MIN_LEN = 6

if __name__ == '__main__':
    import sys

    event_batches = list()
    pp = PrettyPrinter(indent=4)

    with open(sys.argv[1], 'r') as f:
        cur_batch = list()
        for line in f.readlines():
            event = line.split(',')[2].strip()
            if event == 'PAYMENT_SUCCESS':
                event_batches.append(cur_batch)
                cur_batch = list()
            else:
                cur_batch.append(event)
    res = list(
        apriori(event_batches,
                min_support=MIN_SUPPORT,
                min_confidence=MIN_CONFIDENCE))
    for r in res:
        if len(r.items) >= MIN_LEN:
            print("{},{},{}".format(r.support,
                                    r.ordered_statistics[0].confidence,
                                    list(r.items)))
示例#13
0
import numpy as np
import pandas as pd
from apyori import apriori

inp = sys.argv[1]
split_inp = inp.split("//")
arr = []

for s in split_inp:
    arr.append(s.split("||"))

# print(arr)

rules = apriori(transactions=arr,
                min_support=0.03,
                min_confidence=0.05,
                min_lift=1.25,
                min_length=2,
                max_length=2)
results = list(rules)
# print(results)

# for result in results:
#     print(result)

# results = list[filter(lambda x: len(x.items) > 1, results)]
# print('helloooo '+str(list(rules)));

actual_results = []

for result in results:
    if (len(result.items) == 2):
示例#14
0
    # 生成关联规则,筛选符合规则的频繁集计算置信度,满足最小置信度的关联规则添加到列表
    for k in range(max_len - 1):
        for freq_set in freq_sets[k]:
            for sub_set in freq_sets[k + 1]:
                if freq_set.issubset(sub_set):
                    conf = supports[sub_set] / supports[freq_set]
                    rule = (freq_set, sub_set - freq_set, conf)
                    if conf >= min_conf:
                        rules.append(rule)
    return rules
#%%
if __name__ == '__main__':
    data = [[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]


    L, support_data = apriori(data, min_support=0.5)
    association_rules = association_rules(L, support_data, min_conf=0.5)


#%%
from apyori  import apriori
import pandas as pd
data = [['1', '3', '4'], ['2', '3', '5'], ['1', '2', '3', '5'], ['2', '5']]
result = apriori(transactions=data,min_support=0.5,min_confidence=0.7,min_lift=1.3,max_length=2)
result=pd.DataFrame(result)
result.to_excel(r'C:\Users\xiaofan\Desktop\test1031.xlsx')

#%%


示例#15
0
文件: 8.1.py 项目: muyicui/pylearn
import pandas
from apyori import apriori

data = pandas.read_csv('D:\\PDM\\8.1\\data.csv')

transactions = data.groupby(by='交易ID').apply(
    lambda x: list(x.购买商品)
).values

#min_support -- The minimum support of relations (float).
#min_confidence -- The minimum confidence of relations (float).
#min_lift -- The minimum lift of relations (float).
#max_length
results = list(
    apriori(
        transactions
    )
)

#支持度(support)
supports = [];
#自信度(confidence)
confidences = []
#提升度(lift)
lifts = []
#基于项items_base
bases = []
#推导项items_add
adds = []
for r in results:
    supports.append(r.support)
示例#16
0
def test_invalid_support():
    """
    An invalid support.
    """
    transaction_manager = Mock(spec=TransactionManager)
    list(apriori(transaction_manager, min_support=0.0))
示例#17
0
from apyori import apriori
import pandas as pd
import numpy as np

data = pd.read_csv('./data/new_Building_Permits.csv', header=0, engine='python')
data = np.array(data)
print(data.shape)
results = list(apriori(transactions=data, min_confidence=0.7, min_support=0.7))
min_con = 0.9
for relation in results:
    print(relation.items)
    asso_rules = relation.ordered_statistics
    for rule in asso_rules:
        if rule.confidence > min_con:
            print(str(rule.items_base)+' ===>> '+str(rule.items_add))
            print(rule.lift)
示例#18
0
from apyori import apriori

transcations = []

with open("data.csv", "r") as file:
    for line in file:
        transcations.append([x.replace('\n', '') for x in line.split(',')])

results = list(apriori(transcations, min_support=0.15,min_confidence=0.5,min_lift=1.0,min_length=2))
for r in results: 
    print(r, end="\n\n")

示例#19
0
df_reading_score = df.values.tolist()
df_reading_score = [x for x in df_reading_score if x[6] >= 60]
for i in df_reading_score:
    del i[5]
    del i[5]
    del i[5]

# 建一個二維列表for writing grade >= 60
df_writing_score = df.values.tolist()
df_writing_score = [x for x in df_writing_score if x[7] >= 60]
for i in df_writing_score:
    del i[5]
    del i[5]
    del i[5]

association_results_math = list(apriori(df_math_score, min_support=0.2))
# print(association_results_math)
association_results_reading = list(apriori(df_reading_score, min_support=0.2))
# print(association_results_reading)
association_results_writing = list(apriori(df_writing_score, min_support=0.2))
# print(association_results_writing)

# 顯示出math score > 60 的 frequent item_set 跟 support
print('\n=========================== math score ==============================')
for item in association_results_math:
    print('frequent item_set: ' + str(item[0]) + ' support: ' + str(item[1]))

# 顯示出reading score > 60 的 frequent item_set 跟 support
print('\n=========================== reading score ==============================')
for item in association_results_reading:
    print('frequent item_set: ' + str(item[0]) + ' support: ' + str(item[1]))
示例#20
0
from apyori import apriori
data = [['豆奶','莴苣'],
        ['莴苣','尿布','葡萄酒','甜菜'],
        ['豆奶','尿布','葡萄酒','橙汁'],
        ['莴苣','豆奶','尿布','葡萄酒'],
        ['莴苣','豆奶','尿布','橙汁']]

result = list(apriori(transactions=data)
################################# Apriori ################################

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Data Preprocessing
dataset = pd.read_csv('../dataset/Market_Basket_Optimisation.csv', header=None)
# creating list of lists for apyori.py
transactions = []
for i in range(0, 7501):
    transactions.append([str(dataset.values[i, j]) for j in range(0, 20)])

# training apriori on dataset
import sys
#print(sys.path)
sys.path.append('../lib/')
from apyori import apriori

rules = apriori(transactions,
                min_support=0.003,
                min_confidence=0.20,
                min_lift=3,
                min_length=2)  # min_length : min #items in baskets

# visualize results
results = list(rules)
示例#22
0
          
mylist=[]

for i, j in dataset1.values:    
    if i in dic:
        dic[i]=dic[i]+' '+j
    else:
        dic[i]=j
for i,j in dic.items():
    list1.append(dic[i])
for i in list1:
    str1=i.split(' ')
    mylist.append(str1)


rules = apriori(mylist, min_support = 0.0025, min_confidence = 0.2, min_lift = 3)
results = list(rules)





for item in results:
    pair = item[0] 
    items = [x for x in pair]
    print("Rule: " + items[0] + " -> " + items[1])
    print("Support: " + str(item[1]))
    print("Confidence: " + str(item[2][0][2]))
    print("Lift: " + str(item[2][0][3]))
    print("=====================================")
示例#23
0
from apyori import apriori

dataset = pd.read_csv('Market_Basket_Optimisation.csv',header=None)


def cart(items):
    tmp_cart = list(set(items))
    if np.nan in tmp_cart:
        tmp_cart.remove(np.nan)
    return tmp_cart

transactions=list(dataset.apply(cart,axis=1))



rules = apriori(transactions, min_support = 0.003, min_confidence = 0.25, min_lift = 4)
results = list(rules)





for item in results:
    pair = item[0] 
    items = [x for x in pair]
    print("Rule: " + items[0] + " -> " + items[1])
    print("Support: " + str(item[1]))
    print("Confidence: " + str(item[2][0][2]))
    print("Lift: " + str(item[2][0][3]))
    print("=====================================")
示例#24
0
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 21 13:20:04 2019

@author: Guilherme
"""

import pandas as pd

dados = pd.read_csv('mercado2.csv', header=None)
transacoes = []
for i in range(0, 7501):
    transacoes.append([str(dados.values[i, j]) for j in range(0, 20)])

from apyori import apriori
regras = apriori(transacoes,
                 min_support=0.003,
                 min_confidence=0.2,
                 min_lift=3.0,
                 min_lenght=2)

resultados = list(regras)
resultados

resultados2 = [list(x) for x in resultados]
resultados2
resultadoFormatado = []
for j in range(0, 5):
    resultadoFormatado.append([list(x) for x in resultados2[j][2]])
resultadoFormatado
示例#25
0
        return 0
    if x >= 1:
        return 1
basket_sets = basket.applymap(encode_units)

#%%
# Create a table containing top count of each item present in dataset
df1 = df.groupby('Description')['BillNo'].sum().to_frame().reset_index().sort_values(by='BillNo')
df1


#%%
from mlxtend.frequent_patterns import apriori

#Now, let us return the items and itemsets with at least 5% support:
apriori(basket_sets, min_support = 0.0025, use_colnames = True)

#%%
frequent_itemsets = apriori(basket_sets, min_support = 0.0025, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

#%%
#finding support of length == 2
support1=frequent_itemsets[ (frequent_itemsets['length'] == 2) &
                   (frequent_itemsets['support'] >= 0.0025) ]

support1

#%%
# a function that takes the value and returns
print([str(dataset.values[1, j]) for j in range(0, 20)])

#type(dataset.iloc[1,0]) is float
#type(dataset.iloc[1,3]) is float
#print([str(dataset.values[1,j]) for j in range(0, 20) ])
#print([str(dataset.values[1,j]) for j in range(0, 20) if (type(dataset.values[1,j]) is not float )])

transactions = []
for i in range(0, 7501):
    #transactions.append(str(dataset.iloc[i,:].values)) #need to check this one
    transactions.append([str(dataset.values[i, j]) for j in range(0, 20)])

# Training Apriori on the dataset
rules = apriori(transactions,
                min_support=0.003,
                min_confidence=0.25,
                min_lift=4)

print(type(rules))

# next(rules)
"""
Shortcut to write a generator

q = (i**2 for i in [1,2,3,4,5])
print(type(q))
next(q)
p = list(q)
print(p)
"""
# Visualising the results
示例#27
0
#!/usr/bin/env python
# coding: utf-8

# In[41]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from apyori import apriori

# In[42]:

df = pd.read_csv("SuperCenterDataNew.csv", header=None)
display(df)

# In[47]:

df.shape

# In[ ]:

#number of colums
len(df.columns)

# In[ ]:

asscociation_rules = apriori(records, min_support=0.0055)
示例#28
0
er = []
i = input("Do you want to add transaction ?(y/n) ")
while (i == 'y'):
    l = []
    a = input("Is i1 purchased ?(y/n) ")
    if (a == 'y'):
        l.append(items[0])
    b = input("Is i2 purchased ?(y/n) ")
    if (b == 'y'):
        l.append(items[1])
    c = input("Is i3 purchased ?(y/n) ")
    if (c == 'y'):
        l.append(items[2])
    d = input("Is i4 purchased ?(y/n) ")
    if (d == 'y'):
        l.append(items[3])
    e = input("Is i5 purchased ?(y/n) ")
    if (e == 'y'):
        l.append(items[4])
    er.append(l)
    i = input("Do you want to add transaction ?(y/n) ")
ar = apriori(er, min_confidence=0.7)
res = list(ar)
print(len(res))
#print(er)
#print(res)

# In[ ]:

get_ipython().system('pip install apyori')
示例#29
0
    journey.append(p)

#2 or more stores visited
##for Edges
edges=[]
for i in path:
    if len(i) >=2:
        try:
            for j in range(len(i)):
                edges.append((i[j][0], i[j+1][0]))
        except:
            pass

journey2= list(filter(None, journey))

rules = apriori(journey2, min_support = 0.001, min_confidence=0.10, min_lift = 3, min_length=2)
results = list(rules)

lift = []
association = []
confidence = []

for i in range(1, len(results)):
    lift.append(results[:len(results)][i][2][0][3])
    association.append(list(results[:len(results)][i][0]))
    confidence.append(results[:len(results)][i][2][0][2])

rank = pd.DataFrame([association, lift, confidence]).T
rank.dropna(inplace=True)
rank.columns = ['Association', 'Lift', 'confidence']
示例#30
0
# Apriori

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Data Preprocessing
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header=None)
transactions = []
for i in range(0, 7501):
    transactions.append([str(dataset.values[i, j]) for j in range(0, 20)])

# Training Apriori on the dataset
from apyori import apriori

rules = apriori(transactions,
                min_support=0.003,
                min_confidence=0.2,
                min_lift=3,
                min_length=2)

# Visualising the results
results = list(rules)
myResults = [list(x) for x in results]
示例#31
0
import pandas as pd

dados = pd.read_csv('mercado.csv', header=None)
transacoes = []
for i in range(0, 10):
    transacoes.append([str(dados.values[i, j]) for j in range(0, 4)])

from apyori import apriori
regras = apriori(transacoes,
                 min_support=0.3,
                 min_confidence=0.8,
                 min_lift=2,
                 min_length=2)

resultados = list(regras)
resultados

resultados2 = [list(x) for x in resultados]
resultados2
resultadoFormatado = []
for j in range(0, 5):
    resultadoFormatado.append([list(x) for x in resultados2[j][2]])
resultadoFormatado
示例#32
0
文件: apriori.py 项目: jomit/AITrials
# Apriori

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Data Preprocessing
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None)
transactions = []
for i in range(0, 7501):
    transactions.append([str(dataset.values[i,j]) for j in range(0, 20)])

# Training Apriori on the dataset
from apyori import apriori

# product purchased minimum 3 times a day 7 days a week
# min_support = 3*7 / 750 = 0.0028  (round = 0.003)
# min_confidence = 20%
# min_lift = 3
# min_length  2 (atlest 2 items in the basket)
rules = apriori(transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2)

# Visualising the results
results = list(rules)
示例#33
0
import pandas as pd
from apyori import apriori

#from google.colab import drive
#drive.mount('/content/drive')
data = pd.read_csv('store_data.csv')
data

records = []
for i in range(0, 20):
    records.append([str(data.values[i, j]) for j in range(0, 20)])
print(records[0:4])

association_rules = apriori(records,
                            min_support=0.0050,
                            min_confidence=0.35,
                            min_lift=3,
                            min_length=1)
association_results = list(association_rules)
association_results

for item in association_results:

    pair = item[0]
    items = [x for x in pair]
    print("Rule: " + items[0] + " -> " + items[1])

    print("Support: " + str(item[1]))

    print("Confidence: " + str(item[2][0][2]))
    print("Lift: " + str(item[2][0][3]))
示例#34
0
# importing the data (csv format)
import pandas as pd
base = pd.read_csv('groceries.csv', sep=',', header=None)

# transforming each database's line into an array
transactions = []
for i in range(len(base)):
    transactions.append(
        [str(base.values[i, j]) for j in range(len(base.columns))])

# Importing the rule algorithm and setting the parameters
from apyori import apriori
rules = apriori(transactions,
                min_support=0.003,
                min_confidence=0.6,
                min_lift=2.0,
                min_lenght=2)
results = list(rules)

# Listing the main 5 rules found
results = [list(x) for x in results]
shapedResults = []
for i in range(5):
    shapedResults.append([list(x) for x in results[i][2]])
print(shapedResults)
示例#35
0
# text parsing
data_set = []
for i in range(len(lines)):
    data_set.append(korean_nlp.nouns(re.sub('[^가-힣a-zA-Z\s]', '', lines[i])))

# f = open('vts_parsed_message.txt', 'w', encoding='UTF-8')
# for i in range(len(data_set)):
#     str_mode = ','.join(data_set[i])
#     f.write(str_mode, )

print('파싱은 끝났음')

start_time = time.time()

# 데이터 정형화
result = (list(apriori(data_set, min_support=my_min_support)))
df = pd.DataFrame(result)
df['length'] = df['items'].apply(lambda x: len(x))
df = df[(df['length'] == 2) & (df['support'] >= my_min_support)].sort_values(
    by='support', ascending=False)

df.head(10)
print(f'알고리즘 적용 수행 시간: {time.time() - start_time}')

# networkx 그래프 정의
G = nx.Graph()
ar = (df['items'])
G.add_edges_from(ar)

pr = nx.pagerank(G)
nsize = np.array([v for v in pr.values()])
示例#36
0
def extract_apriori_association_rules(matches, max_length, min_support=0.0001):
    return list(apriori(matches, min_support=min_support, max_length=max_length))
示例#37
0
transactions2 = [[1, 3, 7],
                [2, 3, 7],
                [1, 2, 3],
                [2, 3],
                [2, 3, 4, 5],
                [2, 3],
                [1, 2, 3, 4, 6],
                [2, 3, 4, 6],
                [1],
                [1, 3]]

print("Size\tApriori Time\tFP Growth time")
for data in [transactions1, transactions2]:
    #print("Apriori:")
    start1=time.time()
    rules=apriori(data,min_support=(2/len(data))
    end1=time.time()
    rules=list(rules)
    result1=[]
    for i in rules:
        result1.append("".join([str(j) for j in i[0]]))

    #print("FP Growth")
    result2=[]
    start2=time.time()
    patterns = pyfpgrowth.find_frequent_patterns(data, 2)
    end2=time.time()
    for i in patterns:
        result2.append(''.join([str(x) for x in i]))

    print(str(len(data))+"\t"+str(end1-start1)+"\t"+str(end2-start2))