Exemplo n.º 1
0
def finallyResult(currentList,simpDat):
    data= fpTree.createInitSet(simpDat)
    myFPtree,myHeaderTab = fpTree.createTree(data,2)
    freqItems = []
    fpTree.mineTree(myFPtree,myHeaderTab,2,set([]),freqItems)
    result = []
    temp = igemRecomdData.getResult(currentList,freqItems)
    for item in temp:
        t = list(item)
        if (len(t) > 0):
            for item2 in t:
                if (item2 not in result):
                    result.append(item2)
    return result
Exemplo n.º 2
0
def ibm_fptree(min_support, min_confidence):
    path_ibm = './dataset/IBM-Quest-Data-Generator.exe/ttt.data.txt'
    member_group = DataReader.readDataIBM(path_ibm)
    result_df = pd.DataFrame(member_group)
    result_df_T = result_df.T
    L1 = apriori.createL1(result_df_T, min_support)
    for i in range(len(L1)):
        if str(L1[0][i]) == 'None':
            L1 = L1.drop([i], axis=0)
    L1 = L1.reset_index(drop=True)

    l1_sort = L1.sort_values(['count'], ascending=False)
    l1_sort = l1_sort.reset_index(drop=True)

    # member_group = (np.array(result_df)).tolist()

    new_member_group = []
    for i in range(len(member_group)):
        member = []
        for j in range(len(l1_sort)):
            if l1_sort[0][j] in member_group[i]:
                member.append(l1_sort[0][j])
        new_member_group.append(member)

    # 建立FP tree
    fp_tree_root = fpTree.createTree(new_member_group)

    # fp growth
    freq_pats = fpTree.fpGrowth(l1_sort, fp_tree_root, min_support)

    freq_itemset = []
    freq_itemset_count = []
    for i in range(len(freq_pats)):
        if (freq_pats[i].val in freq_itemset) == False:
            freq_itemset.append(freq_pats[i].val)
            freq_itemset_count.append(freq_pats[i].count)

    df_itemset = pd.DataFrame(freq_itemset)
    df_itemset_count = pd.DataFrame(freq_itemset_count, columns=['count'])
    df_ans = pd.concat([df_itemset_count, df_itemset], axis=1)
    print('freq itemsets:')
    print(df_ans)

    generateRule.generateRule(freq_itemset, freq_itemset_count, L1,
                              min_confidence)
Exemplo n.º 3
0
def kaggle_fptree(min_support, min_confidence):
    path_kaggle = './dataset/BreadBasket_DMS.csv'
    member_group = DataReader.readDataKaggle(path_kaggle)
    result_df = pd.DataFrame(member_group)
    result_df_T = result_df.T

    L1 = apriori.createL1(result_df_T, min_support)

    l1_sort = L1.sort_values(['count'], ascending=False)
    l1_sort = l1_sort.reset_index(drop=True)

    # member_group = (np.array(result_df)).tolist()

    new_member_group = []
    for i in range(len(member_group)):
        member = []
        for j in range(len(l1_sort)):
            if l1_sort[0][j] in member_group[i]:
                member.append(l1_sort[0][j])
        new_member_group.append(member)

    # 建立FP tree
    fp_tree_root = fpTree.createTree(new_member_group)

    # fp growth
    freq_pats = fpTree.fpGrowth(l1_sort, fp_tree_root, min_support)

    freq_itemset = []
    freq_itemset_count = []
    for i in range(len(freq_pats)):
        if (freq_pats[i].val in freq_itemset) == False:
            freq_itemset.append(freq_pats[i].val)
            freq_itemset_count.append(freq_pats[i].count)

    df_itemset = pd.DataFrame(freq_itemset)
    df_itemset_count = pd.DataFrame(freq_itemset_count, columns=['count'])
    df_ans = pd.concat([df_itemset_count, df_itemset], axis=1)
    print('freq itemsets:')
    print(df_ans)

    generateRule.generateRule(freq_itemset, freq_itemset_count, L1,
                              min_confidence)
Exemplo n.º 4
0
        #    print str(rule.cause) + '->' + str(rule.effect) + '(support:' + str(rule.support) + ' confidence:' + str(
        #        rule.confidence) + ')'
            counter+=1
    return counter

if __name__ == "__main__":
    total_data = [[0, 1, 2, 3], [0, 1, 2], [2, 3], [0, 1, 4]]
    # 最小支持度
    minSup = 200
    test_data = ge_association_data(10000)

    # 初始集合格式化
    initSet = fpTree.createInitSet(test_data)

    # 构建FP树
    myFPtree, myHeaderTab = fpTree.createTree(initSet, minSup)

    # 创建空列表,保存频繁项集
    myFreqList = []
    fpTree.mineTree(myFPtree, myHeaderTab, minSup, set([]), myFreqList)
    print "频繁项集个数:", len(myFreqList)
    print "频繁项集:", myFreqList
    rules = generate_rules(myFreqList,initSet)
    rule_nums = []
    ratios = np.linspace(0.2, 0.6, 30)
    for ratio in ratios:
        print ratio
        rule_num = print_rules(rules,0.02,ratio)
        rule_nums.append(rule_num)
        print rule_num
    plt.plot(rule_nums)
Exemplo n.º 5
0
def test_fptree(min_support, min_confidence):
    path = './dataset/Test.csv'
    df = pd.read_csv(path)
    # print(df)

    # 將每個組合分開
    member = []
    member_group = []
    # group_id=[]
    for i in range(len(df)):
        if i == 0:
            # member.append(df['tid'][i])
            member.append(df['Item'][i])
        elif df['TID'][i] == df['TID'][i - 1]:
            member.append((df['Item'][i]))
        else:
            member_group.append(list(member))
            # group_id.append(df['t_id'][i-1])
            member = []
            # member.append(df['tid'][i])
            member.append(df['Item'][i])
    member_group.append(list(member))

    # member_group = DataReader.readDataKaggle(path_kaggle)
    result_df = pd.DataFrame(member_group)
    result_df_T = result_df.T

    L1 = apriori.createL1(result_df_T, min_support)

    l1_sort = L1.sort_values(['count'], ascending=False)
    l1_sort = l1_sort.reset_index(drop=True)

    # member_group = (np.array(result_df)).tolist()

    new_member_group = []
    for i in range(len(member_group)):
        member = []
        for j in range(len(l1_sort)):
            if l1_sort[0][j] in member_group[i]:
                member.append(l1_sort[0][j])
        new_member_group.append(member)

    # 建立FP tree
    fp_tree_root = fpTree.createTree(new_member_group)

    # fp growth
    freq_pats = fpTree.fpGrowth(l1_sort, fp_tree_root, min_support)

    freq_itemset = []
    freq_itemset_count = []
    for i in range(len(freq_pats)):
        if (freq_pats[i].val in freq_itemset) == False:
            freq_itemset.append(freq_pats[i].val)
            freq_itemset_count.append(freq_pats[i].count)

    df_itemset = pd.DataFrame(freq_itemset)
    df_itemset_count = pd.DataFrame(freq_itemset_count, columns=['count'])
    df_ans = pd.concat([df_itemset_count, df_itemset], axis=1)
    print('freq itemsets:')
    print(df_ans)

    generateRule.generateRule(freq_itemset, freq_itemset_count, L1,
                              min_confidence)