示例#1
0
    def test_compute_info_gain(self):  # pass
        return
        # compute_info_gain(self, column, column_i, threshold, train_Y, parent_entropy):
        np.random.seed(10)
        column = np.array([10, 5, 2])
        Y = np.array([0, 1, 1])
        node = DecisionTreeNode(1, 3, set(Y))

        print(
            node.compute_info_gain(column,
                                   threshold=3,
                                   train_Y=Y,
                                   parent_entropy=1))
示例#2
0
def computeTree(r, currentAttributeToTree, listOfAllAttributeToTree, root, count):
    key_max = max(currentAttributeToTree.keys(),
                  key=(lambda k: currentAttributeToTree[k].calculateChildrenGain(calculateT(r))))
    root = currentAttributeToTree[key_max]
    for child in root.children:
        if child.yesToWillWait == 0 or child.noToWillWait == 0:
            r = filterRestuarant(root.data, r, child.data)
            if child.yesToWillWait > child.noToWillWait:
                child.children = [DecisionTreeNode("YES")]
            else:
                child.children = [DecisionTreeNode("NO")]

    # * SPECIAL CASE: current iteration missing attribute
    for child in listOfAllAttributeToTree[0][str(root.data) + "Tree"].children:
        if not any(child.data == u.data for u in root.children):
            # Iterates through previous iteration until current one is found
            for a in listOfAllAttributeToTree[::-1]:
                if child in a[str(root.data) + "Tree"].children:
                    Node = DecisionTreeNode(child.data)
                    if child.yesToWillWait >= child.noToWillWait:
                        Node.children = [DecisionTreeNode("YES")]
                    else:
                        Node.children = [DecisionTreeNode("NO")]
                    root.children.append(Node)
                    break
    for child in root.children:
        if child.yesToWillWait != 0 and child.noToWillWait != 0:
            currentAttributeToTree = makeTree(r)
            key_max = max(currentAttributeToTree.keys(),
                          key=(lambda k: currentAttributeToTree[k].calculateChildrenGain(calculateT(r))))
            count += 1
            listOfAllAttributeToTree.append(currentAttributeToTree)
            child.children = [computeTree(r, currentAttributeToTree, listOfAllAttributeToTree, child.children, count)]
    return root
示例#3
0
    def test_train(self):
        print('start test test_split_data_and_attrs')
        # def split_data_and_attrs(self, train_X, train_Y, attrs_ids, parent_entropy):
        train_X = np.array([[1, 2, 3, 1], [1, 3, 5, 8], [1, 2, 7, 5]])
        train_Y = np.array([0, 0, 1])
        attrs_ids = range(4)

        node = DecisionTreeNode(1, 3, set(train_Y))
        left_train_X, left_train_Y, left_attrs_ids, right_train_X, right_train_Y, right_attrs_ids = \
            node.split_data_and_attrs(train_X=train_X, train_Y=train_Y, attrs_ids=attrs_ids, parent_entropy=1)
        print('case 1:')
        node.train(train_X=train_X, train_Y=train_Y, attrs_ids=attrs_ids)

        print('--------------')
示例#4
0
    def build(self):
        for i in range(self.N):
            root = DecisionTreeNode()
            self.forest.append(DecisionTree(root))

        data_blocks = self._random_data_blocks()
        for i in range(len(self.forest)):
            tree = self.forest[i]
            tree.data = data_blocks[i]
            tree.process_data(self.data)
            tree.build(tree.root, tree.data, 0)
示例#5
0
def buildTreeFromRoot(restaurant, attribute):
    root = DecisionTreeRoot(attribute)
    for r in restaurant:
        if not any(child.data == r.mapToAttributeValue[attribute] for child in root.children):
            root.children.append(DecisionTreeNode(r.mapToAttributeValue[attribute]))
        for child in root.children:
            if child.data == r.mapToAttributeValue[attribute]:
                if r.willWait:
                    child.addOneToYesWillWait()
                else:
                    child.addOneToNoWillWait()
    return root
示例#6
0
    def test_majority(self):  # pass
        return
        print('--------')
        print('case 1:')
        train_Y = np.array([5, 0, 0, 5, 0, 1])  # majority 0
        node = DecisionTreeNode(1, 3, set(train_Y))
        print(node.majority(train_Y))
        print('--------')

        print('--------')
        print('case 2:')
        train_Y = np.array([5, 0, 5, 5, 0, 1])  # majority 5
        node = DecisionTreeNode(1, 3, set(train_Y))
        print(node.majority(train_Y))
        print('--------')

        pass
示例#7
0
def Generate_decision_tree(Dx,attribute_listx):

    issameclass=isSameClass(Dx)

    # print(Dx)
    # print(attribute_listx)
    if issameclass!=None:

        # print("Same class", len(Dx))
        obj= DecisionTreeNode(issameclass)
        obj.status="issameclass"
        # print("base case id",obj.id, obj.label)
        return obj

    if len(attribute_listx)==0:
        # global retCount
        # retCount += 1
        ret=getMajorityVoting(Dx)
        obj=DecisionTreeNode(ret)
        obj.status = "attribute length zero"
        # print("base case id", obj.id, obj.label)
        return obj




    D= copy.deepcopy(Dx)


    attribute_list=copy.deepcopy(attribute_listx)

    # print("here")
    splitting_attribute,infoGain, split_point = attribute_selection_method(D, attribute_list)

    # print(splitting_attribute)
    node=DecisionTreeNode(splitting_attribute)
    node.splitpoint=split_point

    # print(attribute_list)
    # print(splitting_attribute, len(Dx))

    attribute_list.remove(splitting_attribute)

    if AttributeType[splitting_attribute]=="Categorical":
        DatabaseList,split_att_values=getPartitionsForCategorical(D,splitting_attribute)
    else:
        DatabaseList,split_att_values=getPartitionsForContinuous(D,splitting_attribute,split_point)  #***

    # print("attribute:", splitting_attribute, "info gain:", infoGain, "split point:", split_point)
    # for db in DatabaseList:
    #     print(len(db))

    idx=0
    for partition in DatabaseList:

        # print(pruneThreshold)
        if len(partition) <= pruneThreshold:
        # if len(partition) == 0:
            # print("Partition length 0")
            ret=getMajorityVoting(Dx)
            childNode = DecisionTreeNode(ret)
            childNode.status="partition length zero"
        else:
            childNode = Generate_decision_tree(partition, attribute_list)

        if AttributeType[splitting_attribute]=="Categorical":
            edgeLabel[(node,childNode)] = split_att_values[idx]  #***
            idx += 1

        # print("->",node.label, childNode.label)
        node.children.append(childNode)


    # edgeType[(node,childNode)]="less"
    # if len(D2) == 0:
    #     childNode = getMajorityVoting(D)
    # else:
    #     childNode = Generate_decision_tree(D2, attribute_list)
    # # edgeLable[splitting_attribute, childNode] = "val>%.2f" % split_point
    # node.children.append(childNode)
    # edgeType[(node,childNode)]="greater"

    return node
示例#8
0
    def test_split_data_and_attrs_optimized(self):  # pass
        return
        train_X = np.array([[1, 2, 3], [1, 3, 5], [1, 2, 7]])
        train_Y = np.array([0, 0, 1])
        node = DecisionTreeNode(1, 3, set(train_Y))
        left_train_X, left_train_Y, left_attrs_ids, right_train_X, right_train_Y, right_attrs_ids = \
            node.split_data_and_attrs_optimized(train_X=train_X, train_Y=train_Y, attrs_ids=[0, 1, 2], parent_entropy=1)
        print('case 1:')
        print('left_train_X:')
        print(left_train_X)
        print(left_train_Y)
        print('\nright_train_X')
        print(right_train_X)
        print(right_train_Y)

        print('--------------')

        print('case 2:')
        train_X = np.array([[1, 1, 1], [1, 1, 0], [0, 0, 1], [1, 0, 0]])
        train_Y = np.array([1, 1, 2, 2])
        node = DecisionTreeNode(1, 3, set(train_Y))
        left_train_X, left_train_Y, left_attrs_ids, right_train_X, right_train_Y, right_attrs_ids = \
            node.split_data_and_attrs_optimized(train_X=train_X, train_Y=train_Y, attrs_ids=[0, 1, 2], parent_entropy=1)
        print('\nleft_train_X:')
        print(left_train_X)
        print(left_train_Y)

        print('\nright_train_X')
        print(right_train_X)
        print(right_train_Y)

        print('\nleft_attrs_ids:')
        print(left_attrs_ids)

        print('\nright_attrs_ids')
        print(right_attrs_ids)

        print('--------------')

        print('case 3:')
        train_X = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]])
        train_Y = np.array([1, 1, 1, 1])
        node = DecisionTreeNode(1, 3, set(train_Y))
        attrs_ids = [0, 2]
        left_train_X, left_train_Y, left_attrs_ids, right_train_X, right_train_Y, right_attrs_ids = \
            node.split_data_and_attrs_optimized(train_X=train_X, train_Y=train_Y, attrs_ids=attrs_ids, parent_entropy=1)
        print('\nleft_train_X:')
        print(left_train_X)
        print(left_train_Y)

        print('\nright_train_X')
        print(right_train_X)
        print(right_train_Y)

        print('\nleft_attrs_ids:')
        print(left_attrs_ids)

        print('\nright_attrs_ids')
        print(right_attrs_ids)

        print('--------------')
        print('case 5:')
        train_X = np.array([[1, 2, 3], [1, 3, 5], [1, 2, 7]])
        train_Y = np.array([1, 0, 1])
        node = DecisionTreeNode(1, 3, set(train_Y))
        left_train_X, left_train_Y, left_attrs_ids, right_train_X, right_train_Y, right_attrs_ids = \
            node.split_data_and_attrs_optimized(train_X=train_X, train_Y=train_Y, attrs_ids=[0, 1, 2], parent_entropy=1)

        print('left_train_X:')
        print(left_train_X)
        print(left_train_Y)
        print('\nright_train_X')
        print(right_train_X)
        print(right_train_Y)

        print('--------------')
示例#9
0
 def test_compute_entropy(self):  # pass
     return
     np.random.seed(10)
     Y = np.random.randint(0, 2, 100)
     node = DecisionTreeNode(1, 3, set(Y))
     print(node.compute_entropy(Y))