Пример #1
0
    def test_max_len(self):
        res_df1 = fpmax(self.df)
        max_len = np.vectorize(len)(res_df1['itemsets']).max()
        assert max_len == 3

        res_df2 = fpmax(self.df, max_len=2)
        max_len = np.vectorize(len)(res_df2['itemsets']).max()
        assert max_len == 2
Пример #2
0
    def test_max_len(self):
        res_df1 = fpmax(self.df)
        max_len = np.max(res_df1['itemsets'].apply(len))
        assert max_len == 3

        res_df2 = fpmax(self.df, max_len=2)
        max_len = np.max(res_df2['itemsets'].apply(len))
        assert max_len == 2
Пример #3
0
def getMaxFrequentPatterns(df,
                           columns,
                           class_to_explain,
                           lprefix='ltable_',
                           rprefix='rtable_',
                           min_support=0.2,
                           k=15):
    transactions = []
    for i in range(len(df)):
        leftValues, rightValues = [], []
        for attr in columns:
            if attr.startswith(lprefix):
                leftValues += str(df.iloc[i][attr]).split()
            elif attr.startswith(rprefix):
                rightValues += str(df.iloc[i][attr]).split()
        if class_to_explain == 0:
            selectedRightValues = set(leftValues).intersection(
                set(rightValues))
            selectedLeftValues = selectedRightValues.copy()
        else:
            selectedLeftValues = set(leftValues).difference(set(rightValues))
            selectedRightValues = set(rightValues).difference(set(leftValues))
        leftValuesPrefixed = list(
            map(lambda val: 'L_' + val, selectedLeftValues))
        rightValuesPrefixed = list(
            map(lambda val: 'R_' + val, selectedRightValues))
        transactions.append(leftValuesPrefixed + rightValuesPrefixed)
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)
    frequent_itemsets = fpmax(df, min_support=min_support, use_colnames=True)
    return frequent_itemsets
Пример #4
0
def mineAssociationRules(df,
                         columns,
                         class_to_explain,
                         lprefix='ltable_',
                         rprefix='rtable_',
                         min_confidence=0.5,
                         min_support=0.2):
    transactions = _createTransactions(df,
                                       columns,
                                       class_to_explain,
                                       left_prefix=lprefix,
                                       right_prefix=rprefix)
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)
    frequent_itemsets = fpmax(df, min_support=min_support, use_colnames=True)
    ar = association_rules(frequent_itemsets,
                           metric="confidence",
                           min_threshold=min_confidence)
    ar['antecedents_isleft'] = ar['antecedents'].apply(
        lambda s: all(token.startswith('L_') for token in s))
    ar['consequents_isright'] = ar['consequents'].apply(
        lambda s: all(token.startswith('R_') for token in s))
    important_rules = ar[(ar.antecedents_isleft == True)
                         & (ar.consequents_isright == True)]
    return important_rules
Пример #5
0
    def test_output(self):
        res_df = fpmax(self.df, min_support=0.001, use_colnames=True)
        expect = pd.DataFrame(
            [[0.25, frozenset(['a'])], [0.25, frozenset(['b'])],
             [0.25, frozenset(['c', 'd'])], [0.25, frozenset(['e'])]],
            columns=['support', 'itemsets'])

        compare_dataframes(res_df, expect)
Пример #6
0
    def test_default(self):
        res_df = fpmax(self.df)
        expect = pd.DataFrame(
            [[0.6, frozenset([5, 6])], [0.6, frozenset([5, 10])],
             [0.6, frozenset([3, 5, 8])]],
            columns=['support', 'itemsets'])

        compare_dataframes(res_df, expect)
Пример #7
0
def getMaxFrequentPatterns(df,
                           columns,
                           class_to_explain,
                           lprefix='ltable_',
                           rprefix='rtable_',
                           min_support=0.2,
                           k=15):
    transactions = _createTransactions(df,
                                       columns,
                                       class_to_explain,
                                       left_prefix=lprefix,
                                       right_prefix=rprefix)
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)
    frequent_itemsets = fpmax(df, min_support=min_support, use_colnames=True)
    return frequent_itemsets
Пример #8
0
def find_common(itemsets, occurrences, **kwargs):
    '''Find common itemsets with FPGroth algorithm'''
    min_support = occurrences / len(itemsets)
    return fpmax(itemsets, min_support=min_support, **kwargs)
Пример #9
0
te_ary = te.fit(np_data).transform(np_data)
data = pd.DataFrame(te_ary, columns=te.columns_)
print(data)

# 2
result_fpgrowth = fpgrowth(data, min_support=0.03, use_colnames=True)
result_fpgrowth['length'] = np.fromiter(map(len, result_fpgrowth['itemsets']),
                                        dtype=int)
print(result_fpgrowth.sort_values('support', ascending=False))

# 3
print(result_fpgrowth.groupby('length').support.min())
print(result_fpgrowth.groupby('length').support.max())

# 4
result_fpmax = fpmax(data, min_support=0.03, use_colnames=True)
result_fpmax['length'] = np.fromiter(map(len, result_fpmax['itemsets']),
                                     dtype=int)
print(result_fpmax.groupby('length').support.min())
print(result_fpmax.groupby('length').support.max())
print(result_fpmax.sort_values('support', ascending=False))

# 6
plt.figure(figsize=(8, 6))
count_of_items = data.sum()
count_of_items.nlargest(10).plot.bar()

plt.figure(figsize=(8, 6))
data_ = result_fpgrowth[result_fpgrowth.length == 1].sort_values(
    'support', ascending=False).set_index('itemsets').support
data_.nlargest(10).plot.bar()
Пример #10
0
    while True:
        sups = result[result['itemsets'].apply(
            lambda r: len(r) == curr_len)]['support']
        if len(sups) == 0:
            break
        print('Длина набора {len}: поддержка [{min}, {max}]'.format(
            len=curr_len,
            min=round(np.min(sups), 5),
            max=round(np.max(sups), 5)))
        curr_len += 1


printMinMaxSupport(fpg_result)

# %%
fpm_result = fpmax(data, min_support=0.03,
                   use_colnames=True).sort_values('support', ascending=False)
fpm_result

# %%
printMinMaxSupport(fpm_result)

# %%
plt.xlabel('Количество попаданий товара в транзакцию')
data.sum().nlargest(10).sort_values().plot.barh()

# %%
plt.xlabel('Уровень поддержки')
fpg_result.set_index('itemsets')['support'].nlargest(
    10).sort_values().plot.barh()

# %%
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpmax
from mlxtend.frequent_patterns import association_rules
from fim import eclat
from tabulate import tabulate

df = pd.read_csv("plants_preprocessed.csv")
print("================ FP-Max ==================")
dataset = df.values.tolist()
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
itemsets = fpmax(df, min_support=0.001, use_colnames=True,max_len = 10)
print(itemsets)

print("\n\n RULES based on FP growth : \n\n")
rules = association_rules(itemsets, min_threshold=0.0001,support_only=True)
print(rules[['antecedents', 'consequents', 'support']])

print("================ ECLAT-Max ================ ")
itemsets = eclat(dataset,target='m',supp=2,report='s')
print(tabulate(itemsets,  headers=['Itemset', 'Support'], tablefmt='pretty'))
Пример #12
0
# hashtags.drop(dropcol, inplace=True)

nodes.reset_index(inplace=True, drop=True)
communities.reset_index(inplace=True, drop=True)
user_hashtag_matrix.reset_index(inplace=True, drop=True)

t = len(hashtags)
n = len(nodes)
k = max(communities[communities_tagname])
t = len(hashtags)
T = range(t)
K = range(k)

if together_constraint:
    hashtags.reset_index(inplace=True, drop=True)
    user_hashtag_matrix.columns = np.arange(0, t)

tags = []
for item in hashtags[0]:
    tags.append(item)
user_hashtag_matrix.columns = tags

# data = fpmax(user_hashtag_matrix, min_support=0.2941, use_colnames=True)
data = fpmax(user_hashtag_matrix, min_support=0.489, use_colnames=True)
name = "maximal " + str(cluster)
file = open(name, "w")
print(data, file=file)
# print(fpmax(user_hashtag_matrix, min_support=0.3, use_colnames=True))
# print(fpmax(user_hashtag_matrix, min_support=0.25, use_colnames=True))
# print(fpmax(user_hashtag_matrix, min_support=0.2, use_colnames=True))
Пример #13
0
te = TransactionEncoder()
te_ary = te.fit(np_data).transform(np_data)
data = pd.DataFrame(te_ary, columns=te.columns_)
print(data)

result = fpgrowth(data, min_support=0.03, use_colnames = True)
print(result)
result['length'] = result['itemsets'].apply(lambda x: len(x))
result_1 = result[result['length'] == 1]
print("len 1 min:", min(result_1['support']))
print("len 1 max:", max(result_1['support']))
result_2 = result[result['length'] == 2]
print("len 2 min:", min(result_2['support']))
print("len 2 max:", max(result_2['support']))

result = fpmax(data, min_support=0.03, use_colnames=True)
print(result)
result['length'] = result['itemsets'].apply(lambda x: len(x))
result_1 = result[result['length'] == 1]
print("len 1 min:", min(result_1['support']))
print("len 1 max:", max(result_1['support']))
result_2 = result[result['length'] == 2]
print("len 2 min:", min(result_2['support']))
print("len 2 max:", max(result_2['support']))

count_of_items = data.sum()
count_of_items.nlargest(10).plot.bar()
plt.tight_layout()
plt.show()

items = ['whole milk', 'yogurt', 'soda', 'tropical fruit', 'shopping bags', 'sausage',
		return True 
	else: 
		return False

start = time.time()
dataset = []   # 2-D array for storing the sequences

with open('out.txt', 'r') as fobj:   # Importing values from txt file containing dataset
    for line in fobj:
        numbers = [int(num) for num in line.split()]     # Single row of the 2-D array
        dataset.append(numbers)

t = TransactionEncoder() 
t_ary = t.fit(dataset).transform(dataset)   # Convrerting to table of true/false 
df = pd.DataFrame(t_ary, columns=t.columns_)  # Converting t_ary table to suitable form for giving input to fpmax
frequent_set = fpmax(df, min_support=0.015,use_colnames=True)  # Applying fpmax algorithm

frequent_set['length'] = frequent_set['itemsets'].apply(lambda x: len(x))
#print(frequent_set)

end = time.time()
#print(end-start)

# For generating length v/s Count plot

# d={}  # # Intializing a dictionary
# for i in range(frequent_set.shape[0]):
# 	if check_dict(d,frequent_set['length'][i]):
# 		d[frequent_set['length'][i]] = d[frequent_set['length'][i]]+1
# 	else :
# 		d[frequent_set['length'][i]] = 1;