Exemplo n.º 1
0
def test2():
    simplDat = fpGrowth.loadSimpleData()
    print "Data: ", simplDat
    initSet = fpGrowth.createInitSet(simplDat)
    print "initSet: ", initSet
    myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 3)
    myFPtree.disp()
    condPat = fpGrowth.findPrefixPath('x', myHeaderTab['x'][1])
    print "condPat: ", condPat

    freqItems = []
    fpGrowth.mineTree(myFPtree, myHeaderTab, 3, set([]), freqItems)
    print "freqItems: ", freqItems
rootNode.disp()
rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix', 3, None)
rootNode.disp()

from importlib import reload
reload(fpGrowth)
simpDat = fpGrowth.loadSimpDat()
simpDat
initSet = fpGrowth.createInitSet(simpDat)
initSet
# 创建FP树
myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 3)
myFPtree.disp()

reload(fpGrowth)
fpGrowth.findPrefixPath('x', myHeaderTab['x'][1])
fpGrowth.findPrefixPath('z', myHeaderTab['z'][1])
fpGrowth.findPrefixPath('r', myHeaderTab['r'][1])

reload(fpGrowth)
freqItems = []
fpGrowth.mineTree(myFPtree, myHeaderTab, 3, set([]), freqItems)
freqItems

# 示例:从新闻网站点击流中挖掘
parsedDat = [line.split() for line in open('kosarak.dat').readlines()]
initSet = fpGrowth.createInitSet(parsedDat)
myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 100000)
myFreqList = []
fpGrowth.mineTree(myFPtree, myHeaderTab, 100000, set([]), myFreqList)
len(myFreqList)
Exemplo n.º 3
0
import fpGrowth

simpDat = fpGrowth.loadSimpDat()
# print simpDat

initSet = fpGrowth.createInitSet(simpDat)
# print initSet

myFpTree, myHeaderTab = fpGrowth.createTree(initSet, 3)
# print myFpTree.disp()
# print myHeaderTab

myCondPat = fpGrowth.findPrefixPath('x', myHeaderTab['x'][1])
# print myCondPat

freqItems = []

myfpGrowth = fpGrowth.mineTree(myFpTree, myHeaderTab, 3, set([]), freqItems)
# print myFpTree.disp()
Exemplo n.º 4
0
Arquivo: 12.py Projeto: niumeng07/ML
rootNode = fpGrowth.treeNode("pyramid", 9, None)  #   这个调用的就是class的__init__函数来声明第一个结点
rootNode.children["eye"] = fpGrowth.treeNode("eye", 13, None)  #   rootNode的['eye']孩子结点也是新声明的一个treeNode
rootNode.display()
rootNode.children["phoenix"] = fpGrowth.treeNode("phoenix", 3, rootNode.children["eye"])
rootNode.display()


simpDat = fpGrowth.loadSimpDat()
print(simpDat)
initSet = fpGrowth.createInitSet(simpDat)
print("createTree with this initSet:", initSet)
myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 3)
myFPtree.display()

Result_x = fpGrowth.findPrefixPath("x", myHeaderTab["x"][1])
Result_z = fpGrowth.findPrefixPath("z", myHeaderTab["z"][1])
Result_r = fpGrowth.findPrefixPath("r", myHeaderTab["r"][1])
print(Result_x)
print(Result_z)
print(Result_r)
freqItems = []
Result = fpGrowth.mineTree(myFPtree, myHeaderTab, 3, set([]), freqItems)
print(Result)

print("100万记录用FP-growth来处理:\n")
parsedDat = [line.split() for line in open("kosarak.dat").readlines()]
initSet = fpGrowth.createInitSet(parsedDat)
myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 100000)
# print(myFPtree,myHeaderTab)
myFreqList = []
import fpGrowth

rootNode = fpGrowth.treeNode('pyramid', 9, None)
rootNode.children['eye'] = fpGrowth.treeNode('eye', 13, None)
rootNode.disp()
rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix', 3, None)
rootNode.disp()

simpDat = fpGrowth.loadSimpDat()
initSet = fpGrowth.createInitSet(simpDat)
print initSet

myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 3)
myFPtree.disp()

print fpGrowth.findPrefixPath('x', myHeaderTab['x'][1])
print fpGrowth.findPrefixPath('z', myHeaderTab['z'][1])
print fpGrowth.findPrefixPath('r', myHeaderTab['r'][1])

freqItems = []
fpGrowth.mineTree(myFPtree, myHeaderTab, 3, set([]), freqItems)
print freqItems
# coding:utf-8

import fpGrowth

rootNode = fpGrowth.treeNode('pyramid', 9, None)

rootNode.children['eye'] = fpGrowth.treeNode('eye', 13, None)

rootNode.disp()

rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix', 3, None)

simpDat = fpGrowth.loadSimpDat()
initSet = fpGrowth.createInitSet(simpDat)
print(initSet)

myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 3)
myFPtree.disp()

#print(myHeaderTab['r'][1])

result = fpGrowth.findPrefixPath('r', myHeaderTab['r'][1])
print(result)
Exemplo n.º 7
0
# -*- coding:utf-8 -*-
import fpGrowth

#算法基本过程:
#1.创建FP树的数据结构
#2.第一次遍历数据集会获得每个元素项的出现频率。 去掉不满足支持度的元素项
#3.对每个事务(即每个记录)中的集合进行排序。排序基于元素项的绝对出现频率来进行
#4.构建FP树。从空集开始,向其中不断添加频繁项集。即在构建时,读入每个事务中的项集,并将其添加到已存在的路径中。
#    如果树中已经存在现有元素,则增加现有元素的值
#    如果该路径不存在,则创建一条新路径。

###测试FP数的数据结构
#rootNode = fpGrowth.treeNode('pyramid',9,None)
#rootNode.children['eye'] = fpGrowth.treeNode('eye',13,None)
#rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix',3,None)
#rootNode.disp()


simData = fpGrowth.loadSimpDat()
initSet = fpGrowth.createInitSet(simData)
myFpTree,myHeaderTab = fpGrowth.createTree(initSet, 3)
myFpTree.disp()

myCondPats = fpGrowth.findPrefixPath('r', myHeaderTab['r'][1])
print ('myCondPats is' , myCondPats)

freqItems = []
fpGrowth.mineTree(myFpTree, myHeaderTab, 3, set ([]), freqItems)
print('频繁项集 is' , freqItems)

Exemplo n.º 8
0
# -*- coding:utf-8 -*-
import fpGrowth

#算法基本过程:
#1.创建FP树的数据结构
#2.第一次遍历数据集会获得每个元素项的出现频率。 去掉不满足支持度的元素项
#3.对每个事务(即每个记录)中的集合进行排序。排序基于元素项的绝对出现频率来进行
#4.构建FP树。从空集开始,向其中不断添加频繁项集。即在构建时,读入每个事务中的项集,并将其添加到已存在的路径中。
#    如果树中已经存在现有元素,则增加现有元素的值
#    如果该路径不存在,则创建一条新路径。

###测试FP数的数据结构
#rootNode = fpGrowth.treeNode('pyramid',9,None)
#rootNode.children['eye'] = fpGrowth.treeNode('eye',13,None)
#rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix',3,None)
#rootNode.disp()

simData = fpGrowth.loadSimpDat()
initSet = fpGrowth.createInitSet(simData)
myFpTree, myHeaderTab = fpGrowth.createTree(initSet, 3)
myFpTree.disp()

myCondPats = fpGrowth.findPrefixPath('r', myHeaderTab['r'][1])
print('myCondPats is', myCondPats)

freqItems = []
fpGrowth.mineTree(myFpTree, myHeaderTab, 3, set([]), freqItems)
print('频繁项集 is', freqItems)