def test2(): simplDat = fpGrowth.loadSimpleData() print "Data: ", simplDat initSet = fpGrowth.createInitSet(simplDat) print "initSet: ", initSet myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 3) myFPtree.disp() condPat = fpGrowth.findPrefixPath('x', myHeaderTab['x'][1]) print "condPat: ", condPat freqItems = [] fpGrowth.mineTree(myFPtree, myHeaderTab, 3, set([]), freqItems) print "freqItems: ", freqItems
rootNode.disp() rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix', 3, None) rootNode.disp() from importlib import reload reload(fpGrowth) simpDat = fpGrowth.loadSimpDat() simpDat initSet = fpGrowth.createInitSet(simpDat) initSet # 创建FP树 myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 3) myFPtree.disp() reload(fpGrowth) fpGrowth.findPrefixPath('x', myHeaderTab['x'][1]) fpGrowth.findPrefixPath('z', myHeaderTab['z'][1]) fpGrowth.findPrefixPath('r', myHeaderTab['r'][1]) reload(fpGrowth) freqItems = [] fpGrowth.mineTree(myFPtree, myHeaderTab, 3, set([]), freqItems) freqItems # 示例:从新闻网站点击流中挖掘 parsedDat = [line.split() for line in open('kosarak.dat').readlines()] initSet = fpGrowth.createInitSet(parsedDat) myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 100000) myFreqList = [] fpGrowth.mineTree(myFPtree, myHeaderTab, 100000, set([]), myFreqList) len(myFreqList)
import fpGrowth simpDat = fpGrowth.loadSimpDat() # print simpDat initSet = fpGrowth.createInitSet(simpDat) # print initSet myFpTree, myHeaderTab = fpGrowth.createTree(initSet, 3) # print myFpTree.disp() # print myHeaderTab myCondPat = fpGrowth.findPrefixPath('x', myHeaderTab['x'][1]) # print myCondPat freqItems = [] myfpGrowth = fpGrowth.mineTree(myFpTree, myHeaderTab, 3, set([]), freqItems) # print myFpTree.disp()
rootNode = fpGrowth.treeNode("pyramid", 9, None) # 这个调用的就是class的__init__函数来声明第一个结点 rootNode.children["eye"] = fpGrowth.treeNode("eye", 13, None) # rootNode的['eye']孩子结点也是新声明的一个treeNode rootNode.display() rootNode.children["phoenix"] = fpGrowth.treeNode("phoenix", 3, rootNode.children["eye"]) rootNode.display() simpDat = fpGrowth.loadSimpDat() print(simpDat) initSet = fpGrowth.createInitSet(simpDat) print("createTree with this initSet:", initSet) myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 3) myFPtree.display() Result_x = fpGrowth.findPrefixPath("x", myHeaderTab["x"][1]) Result_z = fpGrowth.findPrefixPath("z", myHeaderTab["z"][1]) Result_r = fpGrowth.findPrefixPath("r", myHeaderTab["r"][1]) print(Result_x) print(Result_z) print(Result_r) freqItems = [] Result = fpGrowth.mineTree(myFPtree, myHeaderTab, 3, set([]), freqItems) print(Result) print("100万记录用FP-growth来处理:\n") parsedDat = [line.split() for line in open("kosarak.dat").readlines()] initSet = fpGrowth.createInitSet(parsedDat) myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 100000) # print(myFPtree,myHeaderTab) myFreqList = []
import fpGrowth rootNode = fpGrowth.treeNode('pyramid', 9, None) rootNode.children['eye'] = fpGrowth.treeNode('eye', 13, None) rootNode.disp() rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix', 3, None) rootNode.disp() simpDat = fpGrowth.loadSimpDat() initSet = fpGrowth.createInitSet(simpDat) print initSet myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 3) myFPtree.disp() print fpGrowth.findPrefixPath('x', myHeaderTab['x'][1]) print fpGrowth.findPrefixPath('z', myHeaderTab['z'][1]) print fpGrowth.findPrefixPath('r', myHeaderTab['r'][1]) freqItems = [] fpGrowth.mineTree(myFPtree, myHeaderTab, 3, set([]), freqItems) print freqItems
# coding:utf-8 import fpGrowth rootNode = fpGrowth.treeNode('pyramid', 9, None) rootNode.children['eye'] = fpGrowth.treeNode('eye', 13, None) rootNode.disp() rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix', 3, None) simpDat = fpGrowth.loadSimpDat() initSet = fpGrowth.createInitSet(simpDat) print(initSet) myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 3) myFPtree.disp() #print(myHeaderTab['r'][1]) result = fpGrowth.findPrefixPath('r', myHeaderTab['r'][1]) print(result)
# -*- coding:utf-8 -*- import fpGrowth #算法基本过程: #1.创建FP树的数据结构 #2.第一次遍历数据集会获得每个元素项的出现频率。 去掉不满足支持度的元素项 #3.对每个事务(即每个记录)中的集合进行排序。排序基于元素项的绝对出现频率来进行 #4.构建FP树。从空集开始,向其中不断添加频繁项集。即在构建时,读入每个事务中的项集,并将其添加到已存在的路径中。 # 如果树中已经存在现有元素,则增加现有元素的值 # 如果该路径不存在,则创建一条新路径。 ###测试FP数的数据结构 #rootNode = fpGrowth.treeNode('pyramid',9,None) #rootNode.children['eye'] = fpGrowth.treeNode('eye',13,None) #rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix',3,None) #rootNode.disp() simData = fpGrowth.loadSimpDat() initSet = fpGrowth.createInitSet(simData) myFpTree,myHeaderTab = fpGrowth.createTree(initSet, 3) myFpTree.disp() myCondPats = fpGrowth.findPrefixPath('r', myHeaderTab['r'][1]) print ('myCondPats is' , myCondPats) freqItems = [] fpGrowth.mineTree(myFpTree, myHeaderTab, 3, set ([]), freqItems) print('频繁项集 is' , freqItems)
# -*- coding:utf-8 -*- import fpGrowth #算法基本过程: #1.创建FP树的数据结构 #2.第一次遍历数据集会获得每个元素项的出现频率。 去掉不满足支持度的元素项 #3.对每个事务(即每个记录)中的集合进行排序。排序基于元素项的绝对出现频率来进行 #4.构建FP树。从空集开始,向其中不断添加频繁项集。即在构建时,读入每个事务中的项集,并将其添加到已存在的路径中。 # 如果树中已经存在现有元素,则增加现有元素的值 # 如果该路径不存在,则创建一条新路径。 ###测试FP数的数据结构 #rootNode = fpGrowth.treeNode('pyramid',9,None) #rootNode.children['eye'] = fpGrowth.treeNode('eye',13,None) #rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix',3,None) #rootNode.disp() simData = fpGrowth.loadSimpDat() initSet = fpGrowth.createInitSet(simData) myFpTree, myHeaderTab = fpGrowth.createTree(initSet, 3) myFpTree.disp() myCondPats = fpGrowth.findPrefixPath('r', myHeaderTab['r'][1]) print('myCondPats is', myCondPats) freqItems = [] fpGrowth.mineTree(myFpTree, myHeaderTab, 3, set([]), freqItems) print('频繁项集 is', freqItems)