示例#1
0
def pruning(Tree,x,y,t):
    if not isinstance(Tree, dict):
        error = 0
        right = 0
        for tag in y:
            if tag == Tree:
                right += 1
            else:
                error += 1
        return Tree,right,error
    
    nodefN = Tree.keys()[0]
    nodef = int(nodefN[7:])
    canpruning = True
    total1 = 0
    total2 = 0
    totalerror = 0
    
    newTree = {nodefN:{}}
    
    if (t[nodef] == 1): #discrete
        fV = [data[nodef] for data in x]
        fVType = set(fV)
        for Vtype in fVType:
            [subSetx,subSety,subSett] = splitSet_discrete(x,y,t,nodef,Vtype)
            try:
                [pTree, right, error ]= pruning(Tree[nodefN][Vtype],subSetx, subSety, subSett)
                newTree[nodefN][Vtype] = pTree
                if not isinstance(pTree,dict):
                    if pTree == name1:
                        total1 += right
                        total2 += error
                    else:
                        total1 += error
                        total2 += right
                    totalerror += error
                else:
                    canpruning = False
            except:
                totalerror += len(subSetx)
                if total1 < total2:
                    total1 += len(subSetx)
                else:
                    total2 += len(subSetx)    
    else: #numerical
        minv = 0.0
        maxv = 1.0
        delta = (maxv - minv) / NforNumerical + 0.0001
        for i in range(NforNumerical):
            [subSetx,subSety,subSett] = splitSet_numerical(x,y,t,nodef,minv+delta*i,minv+delta*(i+1))
            if (len(subSetx) > 0):
                try:
                    [pTree, right, error ]= pruning(Tree[nodefN][i],subSetx, subSety, subSett)
                    newTree[nodefN][i] = pTree
                    if not isinstance(pTree,dict):
                        if pTree == name1:
                            total1 += right
                            total2 += error
                        else:
                            total1 += error
                            total2 += right
                        totalerror += error
                    else:
                        canpruning = False
                except:
                    totalerror += len(subSetx)
                    if total1 < total2:
                        total1 += len(subSetx)
                    else:
                        total2 += len(subSetx) 

    if canpruning and totalerror > min(total1,total2):
        #print name1,name2,totalerror,total1,total2
        if total1 < total2:
            #print Tree,totalerror,total1,total2,name1
            return name2,total2,total1
        else:
            #print Tree,totalerror,total1,total2,name2
            return name1,total1,total2
        
    return newTree,len(x)-totalerror,totalerror
示例#2
0
def pruning(Tree, x, y, t):
    if not isinstance(Tree, dict):
        error = 0
        right = 0
        for tag in y:
            if tag == Tree:
                right += 1
            else:
                error += 1
        return Tree, right, error

    nodefN = Tree.keys()[0]
    nodef = int(nodefN[7:])
    canpruning = True
    total1 = 0
    total2 = 0
    totalerror = 0

    newTree = {nodefN: {}}

    if (t[nodef] == 1):  #discrete
        fV = [data[nodef] for data in x]
        fVType = set(fV)
        for Vtype in fVType:
            [subSetx, subSety,
             subSett] = splitSet_discrete(x, y, t, nodef, Vtype)
            try:
                [pTree, right, error] = pruning(Tree[nodefN][Vtype], subSetx,
                                                subSety, subSett)
                newTree[nodefN][Vtype] = pTree
                if not isinstance(pTree, dict):
                    if pTree == name1:
                        total1 += right
                        total2 += error
                    else:
                        total1 += error
                        total2 += right
                    totalerror += error
                else:
                    canpruning = False
            except:
                totalerror += len(subSetx)
                if total1 < total2:
                    total1 += len(subSetx)
                else:
                    total2 += len(subSetx)
    else:  #numerical
        minv = 0.0
        maxv = 1.0
        delta = (maxv - minv) / NforNumerical + 0.0001
        for i in range(NforNumerical):
            [subSetx, subSety,
             subSett] = splitSet_numerical(x, y, t, nodef, minv + delta * i,
                                           minv + delta * (i + 1))
            if (len(subSetx) > 0):
                try:
                    [pTree, right, error] = pruning(Tree[nodefN][i], subSetx,
                                                    subSety, subSett)
                    newTree[nodefN][i] = pTree
                    if not isinstance(pTree, dict):
                        if pTree == name1:
                            total1 += right
                            total2 += error
                        else:
                            total1 += error
                            total2 += right
                        totalerror += error
                    else:
                        canpruning = False
                except:
                    totalerror += len(subSetx)
                    if total1 < total2:
                        total1 += len(subSetx)
                    else:
                        total2 += len(subSetx)

    if canpruning and totalerror > min(total1, total2):
        #print name1,name2,totalerror,total1,total2
        if total1 < total2:
            #print Tree,totalerror,total1,total2,name1
            return name2, total2, total1
        else:
            #print Tree,totalerror,total1,total2,name2
            return name1, total1, total2

    return newTree, len(x) - totalerror, totalerror