def ibm_Apiori_hashtree(min_support, min_confidence): path_ibm = './dataset/IBM-Quest-Data-Generator.exe/ttt.data.txt' member_group = DataReader.readDataIBM(path_ibm) result_df = pd.DataFrame(member_group) result_df_T = result_df.T L1 = apriori.createL1(result_df_T, min_support) for i in range(len(L1)): if str(L1[0][i]) == 'None': L1 = L1.drop([i], axis=0) L1 = L1.reset_index(drop=True) Ln = L1 Ls = [] Ls.append(L1) while len(Ln) > 1: print(len(Ls)) C = apriori.generate_cand_itemset(Ln) # print(C) Lnn = apriori_byTree.generate_L(C, member_group, min_support) Ln = Lnn Ls.append(Ln) print('freq itemsets:') freq_itemset = [] freq_itemset_count = [] for i in range(1, len(Ls)): temp = (np.array(Ls[i])).tolist() for j in range(len(temp)): freq_itemset_count.append(temp[j][0]) freq_itemset.append(list(temp[j][1:])) print(temp[j][0], list(temp[j][1:])) #生成rules generateRule.generateRule(freq_itemset, freq_itemset_count, L1, min_confidence)
def kaggle_Apiori_bruteForce(min_support, min_confidence): path_kaggle = './dataset/BreadBasket_DMS.csv' member_group = DataReader.readDataKaggle(path_kaggle) result_df = pd.DataFrame(member_group) result_df_T = result_df.T L1 = apriori.createL1(result_df_T, min_support) Ln = L1 Ls = [] Ls.append(L1) while len(Ln) > 1: print(len(Ls)) Lnn = apriori.cal_support_and_generate_L( apriori.generate_cand_itemset(Ln), result_df, min_support) Ln = Lnn Ls.append(Ln) print('freq itemsets:') freq_itemset = [] freq_itemset_count = [] for i in range(1, len(Ls)): temp = (np.array(Ls[i])).tolist() for j in range(len(temp)): freq_itemset_count.append(temp[j][0]) freq_itemset.append(list(temp[j][1:])) print(temp[j][0], list(temp[j][1:])) #生成rules generateRule.generateRule(freq_itemset, freq_itemset_count, L1, min_confidence)
def test_Apiori_hashtree(min_support, min_confidence): path = './dataset/Test.csv' df = pd.read_csv(path) # print(df) # 將每個組合分開 member = [] member_group = [] # group_id=[] for i in range(len(df)): if i == 0: # member.append(df['tid'][i]) member.append(df['Item'][i]) elif df['TID'][i] == df['TID'][i - 1]: member.append((df['Item'][i])) else: member_group.append(list(member)) # group_id.append(df['t_id'][i-1]) member = [] # member.append(df['tid'][i]) member.append(df['Item'][i]) member_group.append(list(member)) # member_group = DataReader.readDataKaggle(path_kaggle) result_df = pd.DataFrame(member_group) result_df_T = result_df.T L1 = apriori.createL1(result_df_T, min_support) for i in range(len(L1)): if str(L1[0][i]) == 'None': L1 = L1.drop([i], axis=0) L1 = L1.reset_index(drop=True) Ln = L1 Ls = [] Ls.append(L1) while len(Ln) > 1: print(len(Ls)) C = apriori.generate_cand_itemset(Ln) # print(C) Lnn = apriori_byTree.generate_L_kaggle(C, member_group, min_support) Ln = Lnn if len(Ln) == 0: break Ls.append(Ln) print('freq itemsets:') freq_itemset = [] freq_itemset_count = [] for i in range(1, len(Ls)): temp = (np.array(Ls[i])).tolist() for j in range(len(temp)): freq_itemset_count.append(temp[j][0]) freq_itemset.append(list(temp[j][1:])) print(temp[j][0], list(temp[j][1:])) #生成rules generateRule.generateRule(freq_itemset, freq_itemset_count, L1, min_confidence)
def test_Apiori_bruteForce(min_support, min_confidence): path = './dataset/Test.csv' df = pd.read_csv(path) # print(df) # 將每個組合分開 member = [] member_group = [] # group_id=[] for i in range(len(df)): if i == 0: # member.append(df['tid'][i]) member.append(df['Item'][i]) elif df['TID'][i] == df['TID'][i - 1]: member.append((df['Item'][i])) else: member_group.append(list(member)) # group_id.append(df['t_id'][i-1]) member = [] # member.append(df['tid'][i]) member.append(df['Item'][i]) member_group.append(list(member)) # member_group = DataReader.readDataKaggle(path_kaggle) result_df = pd.DataFrame(member_group) result_df_T = result_df.T L1 = apriori.createL1(result_df_T, min_support) Ln = L1 Ls = [] Ls.append(L1) while len(Ln) > 1: print(len(Ls)) Lnn = apriori.cal_support_and_generate_L( apriori.generate_cand_itemset(Ln), result_df, min_support) Ln = Lnn Ls.append(Ln) print('freq itemsets:') freq_itemset = [] freq_itemset_count = [] for i in range(1, len(Ls)): temp = (np.array(Ls[i])).tolist() for j in range(len(temp)): freq_itemset_count.append(temp[j][0]) freq_itemset.append(list(temp[j][1:])) print(temp[j][0], list(temp[j][1:])) #生成rules generateRule.generateRule(freq_itemset, freq_itemset_count, L1, min_confidence)
def ibm_fptree(min_support, min_confidence): path_ibm = './dataset/IBM-Quest-Data-Generator.exe/ttt.data.txt' member_group = DataReader.readDataIBM(path_ibm) result_df = pd.DataFrame(member_group) result_df_T = result_df.T L1 = apriori.createL1(result_df_T, min_support) for i in range(len(L1)): if str(L1[0][i]) == 'None': L1 = L1.drop([i], axis=0) L1 = L1.reset_index(drop=True) l1_sort = L1.sort_values(['count'], ascending=False) l1_sort = l1_sort.reset_index(drop=True) # member_group = (np.array(result_df)).tolist() new_member_group = [] for i in range(len(member_group)): member = [] for j in range(len(l1_sort)): if l1_sort[0][j] in member_group[i]: member.append(l1_sort[0][j]) new_member_group.append(member) # 建立FP tree fp_tree_root = fpTree.createTree(new_member_group) # fp growth freq_pats = fpTree.fpGrowth(l1_sort, fp_tree_root, min_support) freq_itemset = [] freq_itemset_count = [] for i in range(len(freq_pats)): if (freq_pats[i].val in freq_itemset) == False: freq_itemset.append(freq_pats[i].val) freq_itemset_count.append(freq_pats[i].count) df_itemset = pd.DataFrame(freq_itemset) df_itemset_count = pd.DataFrame(freq_itemset_count, columns=['count']) df_ans = pd.concat([df_itemset_count, df_itemset], axis=1) print('freq itemsets:') print(df_ans) generateRule.generateRule(freq_itemset, freq_itemset_count, L1, min_confidence)
def kaggle_fptree(min_support, min_confidence): path_kaggle = './dataset/BreadBasket_DMS.csv' member_group = DataReader.readDataKaggle(path_kaggle) result_df = pd.DataFrame(member_group) result_df_T = result_df.T L1 = apriori.createL1(result_df_T, min_support) l1_sort = L1.sort_values(['count'], ascending=False) l1_sort = l1_sort.reset_index(drop=True) # member_group = (np.array(result_df)).tolist() new_member_group = [] for i in range(len(member_group)): member = [] for j in range(len(l1_sort)): if l1_sort[0][j] in member_group[i]: member.append(l1_sort[0][j]) new_member_group.append(member) # 建立FP tree fp_tree_root = fpTree.createTree(new_member_group) # fp growth freq_pats = fpTree.fpGrowth(l1_sort, fp_tree_root, min_support) freq_itemset = [] freq_itemset_count = [] for i in range(len(freq_pats)): if (freq_pats[i].val in freq_itemset) == False: freq_itemset.append(freq_pats[i].val) freq_itemset_count.append(freq_pats[i].count) df_itemset = pd.DataFrame(freq_itemset) df_itemset_count = pd.DataFrame(freq_itemset_count, columns=['count']) df_ans = pd.concat([df_itemset_count, df_itemset], axis=1) print('freq itemsets:') print(df_ans) generateRule.generateRule(freq_itemset, freq_itemset_count, L1, min_confidence)
def kaggle_Apiori_hashtree(min_support, min_confidence): path_kaggle = './dataset/BreadBasket_DMS.csv' member_group = DataReader.readDataKaggle(path_kaggle) result_df = pd.DataFrame(member_group) result_df_T = result_df.T L1 = apriori.createL1(result_df_T, min_support) for i in range(len(L1)): if str(L1[0][i]) == 'None': L1 = L1.drop([i], axis=0) L1 = L1.reset_index(drop=True) Ln = L1 Ls = [] Ls.append(L1) while len(Ln) > 1: print(len(Ls)) C = apriori.generate_cand_itemset(Ln) # print(C) Lnn = apriori_byTree.generate_L_kaggle(C, member_group, min_support) Ln = Lnn if len(Ln) == 0: break Ls.append(Ln) print('freq itemsets:') freq_itemset = [] freq_itemset_count = [] for i in range(1, len(Ls)): temp = (np.array(Ls[i])).tolist() for j in range(len(temp)): freq_itemset_count.append(temp[j][0]) freq_itemset.append(list(temp[j][1:])) print(temp[j][0], list(temp[j][1:])) #生成rules generateRule.generateRule(freq_itemset, freq_itemset_count, L1, min_confidence)
def test_fptree(min_support, min_confidence): path = './dataset/Test.csv' df = pd.read_csv(path) # print(df) # 將每個組合分開 member = [] member_group = [] # group_id=[] for i in range(len(df)): if i == 0: # member.append(df['tid'][i]) member.append(df['Item'][i]) elif df['TID'][i] == df['TID'][i - 1]: member.append((df['Item'][i])) else: member_group.append(list(member)) # group_id.append(df['t_id'][i-1]) member = [] # member.append(df['tid'][i]) member.append(df['Item'][i]) member_group.append(list(member)) # member_group = DataReader.readDataKaggle(path_kaggle) result_df = pd.DataFrame(member_group) result_df_T = result_df.T L1 = apriori.createL1(result_df_T, min_support) l1_sort = L1.sort_values(['count'], ascending=False) l1_sort = l1_sort.reset_index(drop=True) # member_group = (np.array(result_df)).tolist() new_member_group = [] for i in range(len(member_group)): member = [] for j in range(len(l1_sort)): if l1_sort[0][j] in member_group[i]: member.append(l1_sort[0][j]) new_member_group.append(member) # 建立FP tree fp_tree_root = fpTree.createTree(new_member_group) # fp growth freq_pats = fpTree.fpGrowth(l1_sort, fp_tree_root, min_support) freq_itemset = [] freq_itemset_count = [] for i in range(len(freq_pats)): if (freq_pats[i].val in freq_itemset) == False: freq_itemset.append(freq_pats[i].val) freq_itemset_count.append(freq_pats[i].count) df_itemset = pd.DataFrame(freq_itemset) df_itemset_count = pd.DataFrame(freq_itemset_count, columns=['count']) df_ans = pd.concat([df_itemset_count, df_itemset], axis=1) print('freq itemsets:') print(df_ans) generateRule.generateRule(freq_itemset, freq_itemset_count, L1, min_confidence)