def _put(self,cn,key,value): if(cn.key<key): if(cn.hasRightChild()): cn=cn.right self._put(cn, key, value) else: cn.right=treeNode(key=key,value=value,parent=cn) else: if(cn.hasLeftChild()): cn=cn.left self._put(cn,key,value) else: cn.left=treeNode(key=key,value=value,parent=cn)
def _build_tree(self, x, y): #Build the decision tree recursively node = treeNode() #initialize a node index, value, splits = self._choose_split_index( x, y ) #uses the choose split function to obtain the index of the column we want so split, the value of the split(attribute) # and the splits that are the new array values from the make split done inside the choose. if index is None or len( np.unique(y) ) == 1: #len when only exists one value of target in the y array (so its a leaf and predict the value) node.leaf = True # give the tree node leaf attribute the boolean value True so we know that its this node is a Leaf node.classes = Counter( y ) #Node classes Counter({'no': 4}) Node classes Counter({'yes': 3}) node.name = node.classes.most_common(1)[0][ 0] #node.name has the value of the most common value in the node.classes counter else: X1, y1, X2, y2 = splits #arrays with the values calcualted in the choose split from above node.column = index #give the node.column the value from the index of the column we want to split node.value = value #give the node.value the value from the attribute we will split node.header = self.array_header[0][ index] #gives the node.header the value from the column (only use this in the print time) node.left = self._build_tree( X1, y1 ) #recursively do two new branches to do three (new trees)based on the new array values calculated aboce node.right = self._build_tree(X2, y2) return node
def createTreeNodes(self, ls, node, depthl, strategy, heu): """ :param ls: list successors for a given state :param node: actual node string :param depthl: depth limit int :param strategy: strategy int :param heu: heuristic string :return: list of all problem's nodes """ nodes = [] h = 0 if (depthl >= node._d): for (action, result, cost) in ls: if result._nodes: dmin = min([ self.distance(result._current, n) for n in result._nodes ]) if heu == 'h1': h = dmin elif heu == 'h0': for a, b in itertools.combinations(result._nodes, 2): h = dmin + min([self.distance(a, b)]) s = treeNode(result, strategy, node, float(cost), action, h) nodes.append(s) return nodes
def limSearch(problem, strategy, depthl, pruning, heu): """ :param problem: problem class object :param strategy: strategy int :param depthl: depth limit int :param pruning: pruning option boolean :param heu: heuristic string :return: goal node and number of elements in the frontier """ f = frontier(); problem._visitedList = {} num_f = 0 initial = treeNode(problem._init_state, strategy) f.insert(initial); num_f += 1 problem._visitedList[initial._state._md5] = initial._f sol = False while(not sol and not f.isEmpty()): act = f.remove() if(problem.isGoal(act._state)): sol = True else: ls = problem._state_space.successors(act._state) ln = problem.createTreeNodes(ls, act, depthl, strategy, heu) if pruning: for node in ln: if node._state._md5 not in problem._visitedList: f.insert(node); num_f += 1 problem._visitedList[node._state._md5] = node._f elif abs(node._f) < abs(problem._visitedList[node._state._md5]): f.insert(node); num_f += 1 problem._visitedList[node._state._md5] = node._f else: for node in ln: f.insert(node); num_f += 1 if(sol): return act, num_f else: return None
def put(self,key,value): self.size=self.size+1 if(not self.root): n=treeNode(key=key,value=value) self.root=n else: currentNode=self.root self._put(currentNode,key,value)
def __init__(self,system, samples, boolParams,numParams,fitness = 1.0): '''creates a unit decision maker for genetics''' self.samples = set(samples) self.keyStatements = set(system.keyStatements) self.numNodes = 0 #counting onle expanded trees self.tree = treeNode.treeNode(self.samples,self.keyStatements,system.isMajorant) self.fitness = fitness self.fringe = {self.tree} self.boolStatements = set(boolParams) #available boolean statements self.numStatements = set(numParams) #available numeric statements self.stateUseDict = {i:set() for i in set(list(self.boolStatements)+list(self.numStatements))}#<bool/num>statement:nodes of application self.originDict = {}#dict X>C statement -> num statement X
def createTree(dataSet, minSup=1): """ 创建 FP 树 :param dataSet: 训练数据集 :example { frozenset({'z'}): 1, frozenset({'h', 'j', 'p', 'r', 'z'}): 1, frozenset({'t', 'w', 'u', 'v', 'z', 's', 'x', 'y'}): 1, frozenset({'n', 'o', 's', 'x', 'r'}): 1, frozenset({'t', 'y', 'q', 'p', 'x', 'r', 'z'}): 1, frozenset({'t', 'y', 'q', 'm', 'e', 's', 'x', 'z'}): 1 } :param minSup: 最小支持度 :return: """ headTable = {} # 头指针表 """ 创建头指针表 """ for trans in dataSet: for item in trans: headTable[item] = headTable.get(item, 0) + dataSet[trans] keys = list(headTable.keys()) for k in keys: # 不可以在迭代的时候改变大小 if headTable[k] < minSup: del (headTable[k]) freqItemSet = set(headTable.keys()) if len(freqItemSet) == 0: return None, None for k in headTable: headTable[k] = [headTable[k], None] # 因为value还需要一个指向树节点的指针,故将value扩展为[count,node] 形式 retTree = treeNode('Null Set', 1, None) """ 生成本地数据集,不带树节点 """ for tranSet, count in dataSet.items(): localD = {} for item in tranSet: if item in freqItemSet: localD[item] = headTable[item][0] if len(localD) > 0: orderedItems = [ v[0] for v in sorted( localD.items(), key=lambda p: p[1], reverse=True) ] updateTree(orderedItems, retTree, headTable, count) return retTree, headTable
def updateTree(orderedItems, retTree: treeNode, headTable, count): if orderedItems[0] in retTree.children: # 判断FP树中是否已经有了该节点 retTree.children[orderedItems[0]].inc(count) # 如果有了就直接增加相应的计数 else: retTree.children[orderedItems[0]] = treeNode(orderedItems[0], count, retTree) # 如果没有就新增该节点 if headTable[orderedItems[0]][1] is None: # 如果头指针指向还没有的话可以指向这个新增的节点 headTable[orderedItems[0]][1] = retTree.children[orderedItems[0]] else: updateHeader( headTable[orderedItems[0]][1], retTree.children[orderedItems[0]]) # 如果已经存在就放在已经存在节点的子节点 if len(orderedItems) > 1: updateTree(orderedItems[1::], retTree.children[orderedItems[0]], headTable, count) # 递归创建
def __init__(self,keyParams,boolParams, numParams, samples, samplesCount, nodesCount, majorant = False): '''the LICS itself''' '''reverse keyparams pls!''' self.boolParams = boolParams self.numParams = numParams self.keyParams = keyParams self.keyStatements = [ statements.get_statement(statements.op_takeValue,p) for p in keyParams] self.boolStatements = [ statements.get_statement(statements.op_takeValue,p) for p in boolParams] self.numStatements = [ statements.get_statement(statements.op_takeValue,p) for p in numParams] self.samples = set(samples) self.samplesCount = samplesCount self.nodesCount = nodesCount self.tree = treeNode.treeNode(self.samples,self.keyStatements,majorant) self.clusteriser = cluster.kmeans(self,numParams,boolParams + keyParams,samplesCount) self.isMajorant = majorant
def updateTree(items, inTree, headerTable, count): """ 该函数的目的是为了让FP树生长。 首先测试事务中的第一个元素项是否作为子节点存在。如果存在的话,则更新该元素项的计数。如果不存在,则创建一个新的treeNode并将其作为一个子节点添加到树中。 这时,头指针表也要更新以指向新的节点。更新头指针表需要调用函数updataHeader(). :param items: 满足最小支持度的元素key的数组(从大到小的排序) :param inTree: 空的retTree对象 :param headerTable: 头指针列表 {元素:[元素次数,treeNode]} :param count: 原数据集中每一行元素出现的次数 :return: """ if items[0] in inTree.children: inTree.children[items[0]].inc(count) else: inTree.children[items[0]] = treeNode.treeNode(items[0], count, inTree) # 如果满足minSup的dist字典的value值第二位为null, 我们就设置该元素为 本节点对应的tree节点. if headerTable[items[0]][1] == None: headerTable[items[0]][1] = inTree.children[items[0]] # 如果元素第二位不为null,我们就更新header节点 else: updateHeader(headerTable[items[0]][1], inTree.children[items[0]]) if len(items) > 1: # 递归的调用,在items[0]的基础上,添加item0[1]做子节点, count只要循环的进行累计加和而已,统计出节点的最后的统计值。 updateTree(items[1::], inTree.children[items[0]], headerTable, count)
def createTree(dataSet, minSup=1): """ 生成FP树 :param dataSet: 数据集字典 {行,出现的次数} :param minSup: 最小支持度 :return: 返回FP树 """ headerTable = {} for trans in dataSet: #开始遍历整个数据集字典{行:出现的次数} for item in trans: #开始对每行数据遍历,统计每一行中每个元素出现的总次数 headerTable[item] = headerTable.get(item, 0) + dataSet[trans] for k in list(headerTable.keys()): #删除headerTable中元素不满足最小支持度的元素 if headerTable[k] < minSup: del headerTable[k] freqItemSet = set(headerTable.keys()) #满足minSup的元素集合 if len(freqItemSet) == 0: #如果这个集合是空,就返回None return None, None for k in headerTable: #格式化headerTable {元素:[元素次数,None]} headerTable[k] = [headerTable[k], None] #创建FP树,从空集合开始 retTree = treeNode.treeNode('Null Set', 1, None) # create tree for tranSet, count in dataSet.items(): #开始遍历数据集字典{行:出现的次数} localD = {} for item in tranSet: #开始遍历每一行中的元素,判断在不在freqItemSet中,如果在加入字典 localD{元素:元素次数} if item in freqItemSet: localD[item] = headerTable[item][0] if len(localD) > 0: #如果localD不为空,那么根据全局频率对每个事务中的元素进行排序。 orderedItems = [ v[0] for v in sorted( localD.items(), key=lambda p: p[1], reverse=True) ] #排序之后,开始对树进行填充 updateTree(orderedItems, retTree, headerTable, count) return retTree, headerTable
cStart = m + eps while m <= mMax: Path(logsDir + 'm_' + str(m) + '_figs').mkdir(parents=True, exist_ok=True) f = open(logsDir + 'm_' + str(m) + '_log.txt', "w") stoppingThreshold = 0.0005 foundWorstQ = False print('Searching for worst Q. m: ' + str(m)) cMin = 0 cMax = m + eps plotBool = True #Build tree root = treeNode(0, m, 1, 1, None, [], [], [], []) root.createChildren() currentChildren = root.children for k in range(1, m + 1): numOfChildren = len(currentChildren) nextChildren = [] for childIdx in range(numOfChildren): currentChildren[childIdx].createChildren() nextChildren = nextChildren + currentChildren[childIdx].children currentChildren = nextChildren leafs = currentChildren minLeafIdx = 0 print('Number of leafs: ' + str(len(leafs))) f.write('Number of leafs: ' + str(len(leafs)) + '\n') cnt = -1
def main(trainData, trainLabel, validateData, validateLabel, type=0, thershod=0.01, thershodImpure=0.2, method=0): bestTree = None #最优的决策树 bestPara = -1 #最优的参数选取 bestAccuracy = 0 #最优的验证集准确率 selectList = [] #挑选的超参数结果 global totalPruningNum global totalLeafNum if (type == 1): thershodImpureList = [1e-10, 0.04, 0.10, 0.20, 0.30] # thershodImpure调整时的取值列表 for myThershodImpure in thershodImpureList: print('for thershodImpure = ', myThershodImpure) treeroot = treeNode.treeNode() treeNode.GenerateTree(treeroot, trainData, trainLabel, thershod, myThershodImpure, method) Prune(treeroot, validateData, validateLabel) results1, accuracy1 = Decision(treeroot, trainData, trainLabel) print('train set accuracy:', accuracy1) results2, accuracy2 = Decision(treeroot, validateData, validateLabel) print('validate set accuracy:', accuracy2) selectList.append([ myThershodImpure, accuracy1, accuracy2, treeNode.totalLeafNum, totalPruningNum ]) print('total leaf num:', treeNode.totalLeafNum) print('total pruning num:', totalPruningNum) treeNode.totalLeafCount = 0 # 叶节点个数归为0 treeNode.totalLeafNum = 0 totalPruningNum = 0 if (accuracy2 > bestAccuracy): bestAccuracy = accuracy2 bestTree = treeroot bestPara = myThershodImpure elif (type == 2): methodList = [0, 1, 2] # method调整时的取值列表 for myMethod in methodList: print('for method = ', myMethod) treeroot = treeNode.treeNode() treeNode.GenerateTree(treeroot, trainData, trainLabel, thershod, thershodImpure, myMethod) Prune(treeroot, validateData, validateLabel) results1, accuracy1 = Decision(treeroot, trainData, trainLabel) print('train set accuracy:', accuracy1) results2, accuracy2 = Decision(treeroot, validateData, validateLabel) print('validate set accuracy:', accuracy2) selectList.append([ myMethod, accuracy1, accuracy2, treeNode.totalLeafNum, totalPruningNum ]) print('total leaf num:', treeNode.totalLeafNum) print('total pruning num:', totalPruningNum) treeNode.totalLeafCount = 0 # 叶节点个数归为0 treeNode.totalLeafNum = 0 totalPruningNum = 0 if (accuracy2 > bestAccuracy): bestAccuracy = accuracy2 bestTree = treeroot bestPara = myMethod else: thershodList = [1e-10, 4e-3, 0.01, 0.02, 0.05] # thershod调整时的取值列表 for myThershod in thershodList: print('for thershod = ', myThershod) treeroot = treeNode.treeNode() treeNode.GenerateTree(treeroot, trainData, trainLabel, myThershod, thershodImpure, method) Prune(treeroot, validateData, validateLabel) results1, accuracy1 = Decision(treeroot, trainData, trainLabel) print('train set accuracy:', accuracy1) results2, accuracy2 = Decision(treeroot, validateData, validateLabel) print('validate set accuracy:', accuracy2) selectList.append([ myThershod, accuracy1, accuracy2, treeNode.totalLeafNum, totalPruningNum ]) print('total leaf num:', treeNode.totalLeafNum) print('total pruning num:', totalPruningNum) treeNode.totalLeafCount = 0 # 叶节点个数归为0 treeNode.totalLeafNum = 0 totalPruningNum = 0 if (accuracy2 > bestAccuracy): bestAccuracy = accuracy2 bestTree = treeroot bestPara = myThershod return [selectList, bestTree, bestPara]