示例#1
0
def gen_rules(filepath, args):
    if not os.path.exists(filepath):
        print(filepath, 'not exists,please set the filepath')
    print('\n\n\n')
    print(
        '------------------------------处理文件%s-----------------------------------'
        % (os.path.basename(filepath)))
    with open(filepath, encoding='utf-8') as f:
        dataSet = [line.split() for line in f.readlines()]
    if len(dataSet) <= 100:  ##如果交易数据少于100条,返回空
        print(
            '-----------------------交易数据小于100条,不生成rules-------------------------'
        )
        return []
    n = args.support * len(dataSet)
    initSet = fpgrowth.createInitSet(dataSet)
    myFPtree, myHeaderTab = fpgrowth.createFPtree(initSet, n)
    freqItems = []
    fpgrowth.mineFPtree(myFPtree, myHeaderTab, n, set([]), freqItems)
    # for x in freqItems:
    #     print(x)
    # compute support values of freqItems
    suppData = fpgrowth.calSuppData(myHeaderTab, freqItems, len(dataSet))
    suppData[frozenset([])] = 1.0
    # for x, v in suppData.items():
    #     print(x, v)

    # freqItems = [frozenset(x) for x in freqItems]
    # print(freqItems)

    rules = fpgrowth.generateRules(freqItems,
                                   suppData,
                                   minConf=args.confidence)
    filter_rules = [rule for rule in rules if len(rule[1]) == 1]
    filter_rules = sorted(filter_rules, key=lambda p: p[2], reverse=True)
    print('number of association rules:\n', len(filter_rules))
    return filter_rules
示例#2
0
# myFPtree.disp()

# print fpgrowth.findPrefixPath('z', myHeaderTab)
# print fpgrowth.findPrefixPath('r', myHeaderTab)
# print fpgrowth.findPrefixPath('t', myHeaderTab)

# freqItems = []
# fpgrowth.mineFPtree(myFPtree, myHeaderTab, 3, set([]), freqItems)
# for x in freqItems:
#     print x
'''kosarak data'''
start = time.time()
n = 20000
with open("E:\dvancedos\database\webdocs.dat", "rb") as f:
    parsedDat = [line.split() for line in f.readlines()]
initSet = fpgrowth.createInitSet(parsedDat)
myFPtree, myHeaderTab = fpgrowth.createFPtree(initSet, n)
freqItems = []
fpgrowth.mineFPtree(myFPtree, myHeaderTab, n, set([]), freqItems)
for x in freqItems:
    print(x)
print(time.time() - start, 'sec')

# compute support values of freqItems
suppData = fpgrowth.calSuppData(myHeaderTab, freqItems, len(parsedDat))
suppData[frozenset([])] = 1.0
for x, v in suppData.items():
    print(x, v)

freqItems = [frozenset(x) for x in freqItems]
fpgrowth.generateRules(freqItems, suppData)