def train(self, features: List[List[float]], labels: List[int]):
        X = np.array(features)
        N = X.shape[0]
        pi = np.full((N), 1 / 2)
        y = np.array(labels)
        f = np.zeros((N))
        hx = np.array((N))

        for t in range(self.T):
            num = ((y + 1) / 2) - pi
            den = np.multiply(pi, 1 - pi)
            z = num / den
            w = np.multiply(pi, 1 - pi)

            #step5
            min = 9223372036854775807
            for clf in self.clfs:
                decstump = DecisionStump(clf.s, clf.b, clf.d)
                hxpred = np.array(decstump.predict(features))
                check = np.sum(
                    np.multiply(w, np.multiply(z - hxpred, z - hxpred)))

                if check < min:
                    min_clf = clf
                    hx = hxpred
                    min = check

            self.clfs_picked.append(min_clf)
            self.betas.append(0.5)

            f = f + (1 / 2) * hx
            den = 1 + np.exp(-2 * f)
            pi = 1 / den
    def predict(self, features: List[List[float]]) -> List[int]:
        x = np.array(features)

        f = np.zeros(x.shape[0])
        for t in range(self.T):
            decstump = DecisionStump(self.clfs_picked[t].s,
                                     self.clfs_picked[t].b,
                                     self.clfs_picked[t].d)
            f = f + (self.betas[t] * np.array(decstump.predict(features)))

        predictions = np.ones(f.shape, np.int)
        predictions[np.where(f < 0)[0]] = -1

        return predictions.tolist()
Пример #3
0
    def build_tree(self, X, Y, w, depth, curr_depth):
        # See if we can do any splitting at all
        tree = Node()
        yw = Y*w
        if len(X)<2 or len(unique(Y)) < 2 or curr_depth >= depth:
            tree.stump = 1.0 if abs(sum(yw[yw>=0]))>abs(sum(yw[yw<0])) else -1.0
            return tree
        # TODO: check for inconsistent data

        # Learn the decision stump
        stump = DecisionStump().fit(X,Y,w)
        side1 = stump.predict(X)>=0
        side2 = stump.predict(X)<0

        tree.stump = stump
        tree.left = self.build_tree(X[side1], Y[side1], w[side1], depth, curr_depth+1)
        tree.right = self.build_tree(X[side2], Y[side2], w[side2], depth, curr_depth+1)
        
        return tree
    def train(self, features: List[List[float]], labels: List[int]):
        X = np.array(features)
        N = X.shape[0]
        w = np.full((N), 1 / N)
        labels = np.array(labels)
        hx = np.array((N))

        for t in range(self.T):

            #step3
            min = 9223372036854775807
            for clf in self.clfs:
                decstump = DecisionStump(clf.s, clf.b, clf.d)

                hxpred = np.array(decstump.predict(features))
                indicator = np.zeros((N))
                indicator[np.where(labels != hxpred)[0]] = 1

                check = np.sum(np.multiply(w, indicator))

                if check < min:
                    min_clf = clf
                    hx = hxpred
                    min = check

            self.clfs_picked.append(min_clf)

            error = 0
            for i in range(N):
                if labels[i] != hx[i]:
                    error = error + w[i]

            beta = (1 / 2) * np.log((1 - error) / error)
            self.betas.append(beta)
            for i in range(N):
                if labels[i] == hx[i]:
                    w[i] = w[i] * np.exp((-1) * self.betas[t])
                else:
                    w[i] = w[i] * np.exp(self.betas[t])

            w_sum = np.sum(w)
            w = w / w_sum
Пример #5
0
    def build_tree(self, X, Y, w, depth, curr_depth):
        # See if we can do any splitting at all
        tree = Node()
        yw = Y * w
        if len(X) < 2 or len(unique(Y)) < 2 or curr_depth >= depth:
            tree.stump = 1.0 if abs(sum(yw[yw >= 0])) > abs(sum(
                yw[yw < 0])) else -1.0
            return tree
        # TODO: check for inconsistent data

        # Learn the decision stump
        stump = DecisionStump().fit(X, Y, w)
        side1 = stump.predict(X) >= 0
        side2 = stump.predict(X) < 0

        tree.stump = stump
        tree.left = self.build_tree(X[side1], Y[side1], w[side1], depth,
                                    curr_depth + 1)
        tree.right = self.build_tree(X[side2], Y[side2], w[side2], depth,
                                     curr_depth + 1)

        return tree
Пример #6
0
                        choices=["1.1", "1.2", "1.3", "1.4", "1.5"])

    io_args = parser.parse_args()
    module = io_args.module

    # Decision Stump using inequalities/threshold
    if module == "1.1":
        # 1. Load citiesSmall dataset
        dataset = load_dataset("citiesSmall.pkl")
        X = dataset["X"]
        y = dataset["y"]

        # 2. Evaluate decision stump
        model = DecisionStump()
        model.fit(X, y)
        y_pred = model.predict(X)

        error = np.mean(y_pred != y)
        print("Decision Stump with inequality rule error: %.3f" % error)

        # PLOT RESULT
        utils.plotClassifier(model, X, y)

        fname = os.path.join("..", "figs", "decision_stump_boundary.pdf")
        plt.savefig(fname)
        print("\nFigure saved as '%s'" % fname)

    # Simple decision tree using decision stumps
    elif module == "1.2":
        # 1. Load citiesSmall dataset
        dataset = load_dataset("citiesSmall.pkl")