Exemplo n.º 1
0
def generate_frequent_items(minimum_support):
    frequent_itemsets = fpg.find_frequent_itemsets(data_list, minimum_support=0.07 * len(data_list),
                                                   include_support=True)
    print(type(frequent_itemsets))  # print type
    result = []
    for itemset, support in frequent_itemsets:  # 将generator结果存入list
        result.append((itemset, support / len(data_list)))

    result_patterns = [i[0] for i in result]
    result_support = [i[1] for i in result]
    patterns_df = pd.DataFrame({"fluent_patterns": result_patterns, "support": result_support})
    patterns = {}
    for i in result:
        patterns[frozenset(sorted(i[0]))] = i[1]
    print("-------------挖掘频繁项集---------------")
    print(patterns_df)
    print("--------------频繁项集------------------")
    return patterns
Exemplo n.º 2
0
# python3
# -*- coding: utf-8 -*-
# @Author  : lina
# @Time    : 2018/5/13 11:40
import fp_growth_py3 as fpg

# 数据集
dataset = [['啤酒', '牛奶', '可乐'], ['尿不湿', '啤酒', '牛奶', '橙汁'], ['啤酒', '尿不湿'],
           ['啤酒', '可乐', '尿不湿'], ['啤酒', '牛奶', '可乐']]

if __name__ == '__main__':
    '''
    调用find_frequent_itemsets()生成频繁项
    @:param minimum_support表示设置的最小支持度,即若支持度大于等于inimum_support,保存此频繁项,否则删除
    @:param include_support表示返回结果是否包含支持度,若include_support=True,返回结果中包含itemset和support,否则只返回itemset
    '''
    frequent_itemsets = fpg.find_frequent_itemsets(dataset,
                                                   minimum_support=1,
                                                   include_support=True)
    print(type(frequent_itemsets))  # print type

    result = []
    for itemset, support in frequent_itemsets:  # 将generator结果存入list
        result.append((itemset, support))

    result = sorted(result, key=lambda i: i[0])  # 排序后输出
    for itemset, support in result:
        print(str(itemset) + ' ' + str(support))
Exemplo n.º 3
0
            f.write(k + ' ' + str(v) + '\n')


if __name__ == '__main__':

    print('Reading input file...')
    articles = input('台南 美食(100頁).txt')

    print('Splitting...')
    s = split_into_word(articles, 'str')

    print('Converting to transactions...')
    trans = text2trans(s)

    sw_path = 'stop_words.txt'
    print('Cleaning symbols...')
    trans = clean(trans, sw_path)  # remove stop words
    trans = rm_dup(trans)

    print('Finding frequent patterns...')
    fp = fp.find_frequent_itemsets(trans, 2, True)
    fp = sort(fp)  # sort by support

    food = food_dict('food_list.txt')
    result = find_food(fp, food)  # return's a dictionary

    print('Writing output file...')
    output(fp, 'fp台南(100頁).txt')
    output_result(result, 'fp台南(100頁)(only food).txt')

    print('Done!')