def pruning(Tree,x,y,t): if not isinstance(Tree, dict): error = 0 right = 0 for tag in y: if tag == Tree: right += 1 else: error += 1 return Tree,right,error nodefN = Tree.keys()[0] nodef = int(nodefN[7:]) canpruning = True total1 = 0 total2 = 0 totalerror = 0 newTree = {nodefN:{}} if (t[nodef] == 1): #discrete fV = [data[nodef] for data in x] fVType = set(fV) for Vtype in fVType: [subSetx,subSety,subSett] = splitSet_discrete(x,y,t,nodef,Vtype) try: [pTree, right, error ]= pruning(Tree[nodefN][Vtype],subSetx, subSety, subSett) newTree[nodefN][Vtype] = pTree if not isinstance(pTree,dict): if pTree == name1: total1 += right total2 += error else: total1 += error total2 += right totalerror += error else: canpruning = False except: totalerror += len(subSetx) if total1 < total2: total1 += len(subSetx) else: total2 += len(subSetx) else: #numerical minv = 0.0 maxv = 1.0 delta = (maxv - minv) / NforNumerical + 0.0001 for i in range(NforNumerical): [subSetx,subSety,subSett] = splitSet_numerical(x,y,t,nodef,minv+delta*i,minv+delta*(i+1)) if (len(subSetx) > 0): try: [pTree, right, error ]= pruning(Tree[nodefN][i],subSetx, subSety, subSett) newTree[nodefN][i] = pTree if not isinstance(pTree,dict): if pTree == name1: total1 += right total2 += error else: total1 += error total2 += right totalerror += error else: canpruning = False except: totalerror += len(subSetx) if total1 < total2: total1 += len(subSetx) else: total2 += len(subSetx) if canpruning and totalerror > min(total1,total2): #print name1,name2,totalerror,total1,total2 if total1 < total2: #print Tree,totalerror,total1,total2,name1 return name2,total2,total1 else: #print Tree,totalerror,total1,total2,name2 return name1,total1,total2 return newTree,len(x)-totalerror,totalerror
def pruning(Tree, x, y, t): if not isinstance(Tree, dict): error = 0 right = 0 for tag in y: if tag == Tree: right += 1 else: error += 1 return Tree, right, error nodefN = Tree.keys()[0] nodef = int(nodefN[7:]) canpruning = True total1 = 0 total2 = 0 totalerror = 0 newTree = {nodefN: {}} if (t[nodef] == 1): #discrete fV = [data[nodef] for data in x] fVType = set(fV) for Vtype in fVType: [subSetx, subSety, subSett] = splitSet_discrete(x, y, t, nodef, Vtype) try: [pTree, right, error] = pruning(Tree[nodefN][Vtype], subSetx, subSety, subSett) newTree[nodefN][Vtype] = pTree if not isinstance(pTree, dict): if pTree == name1: total1 += right total2 += error else: total1 += error total2 += right totalerror += error else: canpruning = False except: totalerror += len(subSetx) if total1 < total2: total1 += len(subSetx) else: total2 += len(subSetx) else: #numerical minv = 0.0 maxv = 1.0 delta = (maxv - minv) / NforNumerical + 0.0001 for i in range(NforNumerical): [subSetx, subSety, subSett] = splitSet_numerical(x, y, t, nodef, minv + delta * i, minv + delta * (i + 1)) if (len(subSetx) > 0): try: [pTree, right, error] = pruning(Tree[nodefN][i], subSetx, subSety, subSett) newTree[nodefN][i] = pTree if not isinstance(pTree, dict): if pTree == name1: total1 += right total2 += error else: total1 += error total2 += right totalerror += error else: canpruning = False except: totalerror += len(subSetx) if total1 < total2: total1 += len(subSetx) else: total2 += len(subSetx) if canpruning and totalerror > min(total1, total2): #print name1,name2,totalerror,total1,total2 if total1 < total2: #print Tree,totalerror,total1,total2,name1 return name2, total2, total1 else: #print Tree,totalerror,total1,total2,name2 return name1, total1, total2 return newTree, len(x) - totalerror, totalerror