import apriori def load_termlist(termsetfile=None): if termsetfile != None: fr2 = open(termsetfile, 'rb') termlist = pickle.load(fr2) print('sub_termlist is:') for i in range(10): print(termlist[i]) print('the number of lines :', len(termlist)) return termsetfile if __name__ == '__main__': min_support = 0.3 sub_termsetfile = "/home/freestyle4568/lesson/Clothes-match-txt/sub_user_termset.pickle" termlist = load_termlist(sub_termsetfile) termfreq_list, support_data_term = fp_growth.fptree( termlist[0:10000], int(min_support * 10000)) print('termfreq_list is: ') for i in range(len(termfreq_list)): print(termfreq_list[i]) termfreqfile = '/home/freestyle4568/lesson/Clothes-match-txt/termfreq_10000.pickle' fr_termfreq = open(termfreqfile, 'wb') pickle.dump(termfreq_list, fr_termfreq) fr_termfreq.close()
import fp_growth import apriori def load_termlist(termsetfile=None): if termsetfile != None: fr2 = open(termsetfile, "rb") termlist = pickle.load(fr2) print("sub_termlist is:") for i in range(10): print(termlist[i]) print("the number of lines :", len(termlist)) return termsetfile if __name__ == "__main__": min_support = 0.3 sub_termsetfile = "/home/freestyle4568/lesson/Clothes-match-txt/sub_user_termset.pickle" termlist = load_termlist(sub_termsetfile) termfreq_list, support_data_term = fp_growth.fptree(termlist[0:10000], int(min_support * 10000)) print("termfreq_list is: ") for i in range(len(termfreq_list)): print(termfreq_list[i]) termfreqfile = "/home/freestyle4568/lesson/Clothes-match-txt/termfreq_10000.pickle" fr_termfreq = open(termfreqfile, "wb") pickle.dump(termfreq_list, fr_termfreq) fr_termfreq.close()
sub_catlist.append(catlist[i]) #=========================================================================== # 输出sub_catlist 10行 #=========================================================================== #======================================================================= # print('sub_catlist is: ') # for i in range(10): # print(sub_catlist[i]) # print('sub_catlist\'s length is : ',len(sub_catlist)) # print('-----------------------------------------') #======================================================================= #=========================================================================== # 输出sub_catlist中的频繁项,运用fp_growth算法 #=========================================================================== catfreq_list, support_data_cat = fp_growth.fptree(sub_catlist, int(min_support*len(sub_catlist))) #======================================================================= # print('catfreq_list is: ') # for i in range(len(catfreq_list)): # print(catfreq_list[i]) # print('-----------------------------------------') #======================================================================= #=========================================================================== # for i in support_data.items(): # print(i) #=========================================================================== big_rule_list = apriori.generate_rules(catfreq_list[0:2], support_data_cat, min_confidence) #print('rule list follows: ') cat_rule_list = [] for rule in big_rule_list: if test_one_category in rule[0] and len(rule[0]) == 1:
if testcategory[0] in catlist[i]: sub_catlist.append(catlist[i]) # =========================================================================== # 输出sub_catlist 10行 # =========================================================================== # =========================================================================== # for i in range(10): # print(sub_catlist[i]) print("sub_catlist's length is : ", len(sub_catlist)) # =========================================================================== # =========================================================================== # 输出sub_catlist中的频繁项,运用fp_growth算法 # =========================================================================== catfreq_list, support_data = fp_growth.fptree(sub_catlist, 100000) print("catfreq_list is: ") for i in range(len(catfreq_list)): print(catfreq_list[i]) # =========================================================================== # for i in support_data.items(): # print(i) # =========================================================================== big_rule_list = apriori.generate_rules(catfreq_list, support_data, 0.5) print("rule list follows: ") for rule in big_rule_list: if test_one in rule[0] and len(rule[0]) == 1: print(rule)