示例#1
0
import apriori


def load_termlist(termsetfile=None):
    if termsetfile != None:
        fr2 = open(termsetfile, 'rb')
        termlist = pickle.load(fr2)
        print('sub_termlist is:')
        for i in range(10):
            print(termlist[i])
        print('the number of lines :', len(termlist))
    return termsetfile


if __name__ == '__main__':

    min_support = 0.3
    sub_termsetfile = "/home/freestyle4568/lesson/Clothes-match-txt/sub_user_termset.pickle"

    termlist = load_termlist(sub_termsetfile)
    termfreq_list, support_data_term = fp_growth.fptree(
        termlist[0:10000], int(min_support * 10000))

    print('termfreq_list is: ')
    for i in range(len(termfreq_list)):
        print(termfreq_list[i])
    termfreqfile = '/home/freestyle4568/lesson/Clothes-match-txt/termfreq_10000.pickle'
    fr_termfreq = open(termfreqfile, 'wb')
    pickle.dump(termfreq_list, fr_termfreq)
    fr_termfreq.close()
import fp_growth
import apriori


def load_termlist(termsetfile=None):
    if termsetfile != None:
        fr2 = open(termsetfile, "rb")
        termlist = pickle.load(fr2)
        print("sub_termlist is:")
        for i in range(10):
            print(termlist[i])
        print("the number of lines :", len(termlist))
    return termsetfile


if __name__ == "__main__":

    min_support = 0.3
    sub_termsetfile = "/home/freestyle4568/lesson/Clothes-match-txt/sub_user_termset.pickle"

    termlist = load_termlist(sub_termsetfile)
    termfreq_list, support_data_term = fp_growth.fptree(termlist[0:10000], int(min_support * 10000))

    print("termfreq_list is: ")
    for i in range(len(termfreq_list)):
        print(termfreq_list[i])
    termfreqfile = "/home/freestyle4568/lesson/Clothes-match-txt/termfreq_10000.pickle"
    fr_termfreq = open(termfreqfile, "wb")
    pickle.dump(termfreq_list, fr_termfreq)
    fr_termfreq.close()
         sub_catlist.append(catlist[i])
          
 #===========================================================================
 # 输出sub_catlist 10行
 #===========================================================================
 #=======================================================================
 # print('sub_catlist is: ')
 # for i in range(10):
 #     print(sub_catlist[i])
 # print('sub_catlist\'s length is : ',len(sub_catlist))
 # print('-----------------------------------------')
 #=======================================================================
 #===========================================================================
 # 输出sub_catlist中的频繁项,运用fp_growth算法
 #===========================================================================
 catfreq_list, support_data_cat = fp_growth.fptree(sub_catlist, int(min_support*len(sub_catlist)))
 #=======================================================================
 # print('catfreq_list is: ')
 # for i in range(len(catfreq_list)):
 #     print(catfreq_list[i])
 # print('-----------------------------------------')
 #=======================================================================
 #===========================================================================
 # for i in support_data.items():
 #     print(i)
 #===========================================================================
 big_rule_list = apriori.generate_rules(catfreq_list[0:2], support_data_cat, min_confidence)
 #print('rule list follows: ')
 cat_rule_list = []
 for rule in big_rule_list:
     if test_one_category in rule[0] and len(rule[0]) == 1:
        if testcategory[0] in catlist[i]:
            sub_catlist.append(catlist[i])

    # ===========================================================================
    # 输出sub_catlist 10行
    # ===========================================================================
    # ===========================================================================
    # for i in range(10):
    #     print(sub_catlist[i])
    print("sub_catlist's length is : ", len(sub_catlist))
    # ===========================================================================

    # ===========================================================================
    # 输出sub_catlist中的频繁项,运用fp_growth算法
    # ===========================================================================
    catfreq_list, support_data = fp_growth.fptree(sub_catlist, 100000)
    print("catfreq_list is: ")
    for i in range(len(catfreq_list)):
        print(catfreq_list[i])

    # ===========================================================================
    # for i in support_data.items():
    #     print(i)
    # ===========================================================================

    big_rule_list = apriori.generate_rules(catfreq_list, support_data, 0.5)
    print("rule list follows: ")
    for rule in big_rule_list:
        if test_one in rule[0] and len(rule[0]) == 1:
            print(rule)