def finallyResult(currentList,simpDat): data= fpTree.createInitSet(simpDat) myFPtree,myHeaderTab = fpTree.createTree(data,2) freqItems = [] fpTree.mineTree(myFPtree,myHeaderTab,2,set([]),freqItems) result = [] temp = igemRecomdData.getResult(currentList,freqItems) for item in temp: t = list(item) if (len(t) > 0): for item2 in t: if (item2 not in result): result.append(item2) return result
def ibm_fptree(min_support, min_confidence): path_ibm = './dataset/IBM-Quest-Data-Generator.exe/ttt.data.txt' member_group = DataReader.readDataIBM(path_ibm) result_df = pd.DataFrame(member_group) result_df_T = result_df.T L1 = apriori.createL1(result_df_T, min_support) for i in range(len(L1)): if str(L1[0][i]) == 'None': L1 = L1.drop([i], axis=0) L1 = L1.reset_index(drop=True) l1_sort = L1.sort_values(['count'], ascending=False) l1_sort = l1_sort.reset_index(drop=True) # member_group = (np.array(result_df)).tolist() new_member_group = [] for i in range(len(member_group)): member = [] for j in range(len(l1_sort)): if l1_sort[0][j] in member_group[i]: member.append(l1_sort[0][j]) new_member_group.append(member) # 建立FP tree fp_tree_root = fpTree.createTree(new_member_group) # fp growth freq_pats = fpTree.fpGrowth(l1_sort, fp_tree_root, min_support) freq_itemset = [] freq_itemset_count = [] for i in range(len(freq_pats)): if (freq_pats[i].val in freq_itemset) == False: freq_itemset.append(freq_pats[i].val) freq_itemset_count.append(freq_pats[i].count) df_itemset = pd.DataFrame(freq_itemset) df_itemset_count = pd.DataFrame(freq_itemset_count, columns=['count']) df_ans = pd.concat([df_itemset_count, df_itemset], axis=1) print('freq itemsets:') print(df_ans) generateRule.generateRule(freq_itemset, freq_itemset_count, L1, min_confidence)
def kaggle_fptree(min_support, min_confidence): path_kaggle = './dataset/BreadBasket_DMS.csv' member_group = DataReader.readDataKaggle(path_kaggle) result_df = pd.DataFrame(member_group) result_df_T = result_df.T L1 = apriori.createL1(result_df_T, min_support) l1_sort = L1.sort_values(['count'], ascending=False) l1_sort = l1_sort.reset_index(drop=True) # member_group = (np.array(result_df)).tolist() new_member_group = [] for i in range(len(member_group)): member = [] for j in range(len(l1_sort)): if l1_sort[0][j] in member_group[i]: member.append(l1_sort[0][j]) new_member_group.append(member) # 建立FP tree fp_tree_root = fpTree.createTree(new_member_group) # fp growth freq_pats = fpTree.fpGrowth(l1_sort, fp_tree_root, min_support) freq_itemset = [] freq_itemset_count = [] for i in range(len(freq_pats)): if (freq_pats[i].val in freq_itemset) == False: freq_itemset.append(freq_pats[i].val) freq_itemset_count.append(freq_pats[i].count) df_itemset = pd.DataFrame(freq_itemset) df_itemset_count = pd.DataFrame(freq_itemset_count, columns=['count']) df_ans = pd.concat([df_itemset_count, df_itemset], axis=1) print('freq itemsets:') print(df_ans) generateRule.generateRule(freq_itemset, freq_itemset_count, L1, min_confidence)
# print str(rule.cause) + '->' + str(rule.effect) + '(support:' + str(rule.support) + ' confidence:' + str( # rule.confidence) + ')' counter+=1 return counter if __name__ == "__main__": total_data = [[0, 1, 2, 3], [0, 1, 2], [2, 3], [0, 1, 4]] # 最小支持度 minSup = 200 test_data = ge_association_data(10000) # 初始集合格式化 initSet = fpTree.createInitSet(test_data) # 构建FP树 myFPtree, myHeaderTab = fpTree.createTree(initSet, minSup) # 创建空列表,保存频繁项集 myFreqList = [] fpTree.mineTree(myFPtree, myHeaderTab, minSup, set([]), myFreqList) print "频繁项集个数:", len(myFreqList) print "频繁项集:", myFreqList rules = generate_rules(myFreqList,initSet) rule_nums = [] ratios = np.linspace(0.2, 0.6, 30) for ratio in ratios: print ratio rule_num = print_rules(rules,0.02,ratio) rule_nums.append(rule_num) print rule_num plt.plot(rule_nums)
def test_fptree(min_support, min_confidence): path = './dataset/Test.csv' df = pd.read_csv(path) # print(df) # 將每個組合分開 member = [] member_group = [] # group_id=[] for i in range(len(df)): if i == 0: # member.append(df['tid'][i]) member.append(df['Item'][i]) elif df['TID'][i] == df['TID'][i - 1]: member.append((df['Item'][i])) else: member_group.append(list(member)) # group_id.append(df['t_id'][i-1]) member = [] # member.append(df['tid'][i]) member.append(df['Item'][i]) member_group.append(list(member)) # member_group = DataReader.readDataKaggle(path_kaggle) result_df = pd.DataFrame(member_group) result_df_T = result_df.T L1 = apriori.createL1(result_df_T, min_support) l1_sort = L1.sort_values(['count'], ascending=False) l1_sort = l1_sort.reset_index(drop=True) # member_group = (np.array(result_df)).tolist() new_member_group = [] for i in range(len(member_group)): member = [] for j in range(len(l1_sort)): if l1_sort[0][j] in member_group[i]: member.append(l1_sort[0][j]) new_member_group.append(member) # 建立FP tree fp_tree_root = fpTree.createTree(new_member_group) # fp growth freq_pats = fpTree.fpGrowth(l1_sort, fp_tree_root, min_support) freq_itemset = [] freq_itemset_count = [] for i in range(len(freq_pats)): if (freq_pats[i].val in freq_itemset) == False: freq_itemset.append(freq_pats[i].val) freq_itemset_count.append(freq_pats[i].count) df_itemset = pd.DataFrame(freq_itemset) df_itemset_count = pd.DataFrame(freq_itemset_count, columns=['count']) df_ans = pd.concat([df_itemset_count, df_itemset], axis=1) print('freq itemsets:') print(df_ans) generateRule.generateRule(freq_itemset, freq_itemset_count, L1, min_confidence)