示例#1
0
    def grow_tree(self, X, y, depth=0):
        pop_per_class = R.Tensor([])
        for c in range(self.num_classes):
            pop_per_class = pop_per_class.concat(
                R.sum(R.equal(y, R.Scalar(c))).expand_dims())
        predicted_class = R.argmax(pop_per_class)
        node = Node(predicted_class=predicted_class, depth=depth)
        node.samples = R.shape(y).gather(R.Scalar(0))

        if depth < self.max_depth:
            #col, threshold = self.find_split(X, y)
            col, threshold = 0, R.Tensor([12.895])
            '''
            
            '''
            decision = R.Scalar(col).logical_and(threshold)
            while decision.status != "computed":
                pass
            if decision.output == 1:
                indices_left = X.transpose().gather(
                    R.Scalar(col)).less(threshold)
                X_left, y_left = X.gather(indices_left), y.gather(indices_left)
                indices_right = X.transpose().gather(
                    R.Scalar(col)).greater_equal(threshold)
                X_right, y_right = X.gather(indices_right), y.gather(
                    indices_right)
                node.feature_index = col
                node.threshold = threshold
                node.left = self.grow_tree(X_left, y_left, depth + 1)
                node.left.leftbranch = True
                node.right = self.grow_tree(X_right, y_right, depth + 1)
                node.right.rightbranch = True
        return node
示例#2
0
def get_TP_TN_FN_FP(true_labels, pred_labels):
    li = [None, None, None, None]
    var = R.equal(true_labels, pred_labels)
    TP = R.logical_and(true_labels, pred_labels)
    TN = R.logical_not(R.logical_or(true_labels, pred_labels))
    FN = R.logical_not(R.logical_or(pred_labels, var))
    FP = R.logical_and(pred_labels, R.logical_not(true_labels))
    return [R.sum(TP), R.sum(TN), R.sum(FN), R.sum(FP)]
示例#3
0
def accuracy(y_true, y_pred):
    if not isinstance(y_true, R.Tensor):
        if not isinstance(y_true, R.Op):
            y_true = R.Tensor(y_true)
    if not isinstance(y_pred, R.Tensor):
        if not isinstance(y_pred, R.Op):
            y_pred = R.Tensor(y_pred)

    return R.div(R.sum(R.equal(y_pred, y_true)), y_pred.shape_())
示例#4
0
    def grow_tree(self, X, y, depth=0):
        pop_per_class = R.Tensor([])
        for c in range(self.num_classes):
            pop_per_class = pop_per_class.concat(
                R.sum(R.equal(y, R.Scalar(c))).expand_dims())
        predicted_class = R.argmax(pop_per_class)
        while predicted_class.status != "computed":
            pass

        node = Node(predicted_class=predicted_class.output, depth=depth)
        node.samples = R.shape(y).gather(R.Scalar(0))
        if depth < self.max_depth:
            col, threshold = self.find_split(X, y)
            while threshold.status != "computed":
                pass
            z = X.shape_()
            z1 = y.shape_()
            while z1.status != "computed":
                pass
            if col is not None and threshold.output is not [None]:
                indices_left = X.transpose().gather(
                    R.Scalar(col)).less(threshold)
                X_left = X.gather(
                    R.find_indices(indices_left, R.Tensor(
                        [1])).reshape(shape=R.sum(indices_left).expand_dims()))
                y_left = y.gather(
                    R.find_indices(indices_left, R.Tensor(
                        [1])).reshape(shape=R.sum(indices_left).expand_dims()))

                indices_right = X.transpose().gather(
                    R.Scalar(col)).greater_equal(threshold)
                X_right = X.gather(
                    R.find_indices(indices_right, R.Tensor([
                        1
                    ])).reshape(shape=R.sum(indices_right).expand_dims()))
                y_right = y.gather(
                    R.find_indices(indices_right, R.Tensor([
                        1
                    ])).reshape(shape=R.sum(indices_right).expand_dims()))
                node.feature_index = col
                node.threshold = threshold

                node.left = self.grow_tree(X_left, y_left, depth + 1)
                node.left.leftbranch = True
                node.right = self.grow_tree(X_right, y_right, depth + 1)
                node.right.rightbranch = True
        return node
示例#5
0
    def find_split(self, X, y):
        ideal_col = None
        ideal_threshold = None

        num_observations = y.shape_().gather(R.Scalar(0))
        while num_observations.status != 'computed':
            pass
        num_observations = int(num_observations.output)
        if num_observations <= 1:
            return ideal_col, ideal_threshold

        y = y.reshape(shape=[num_observations])
        count_in_parent = R.Tensor([])
        for c in range(self.num_classes):
            count_in_parent = count_in_parent.concat(
                R.sum(R.equal(y, R.Scalar(c))).expand_dims())
        gini = R.square(
            count_in_parent.foreach(operation='div', params=num_observations))
        best_gini = R.sub(R.Scalar(1.0), R.sum(gini))
        temp_y = y.reshape(shape=[num_observations, 1])

        for col in range(self.num_features):
            temp_X = R.gather(
                R.transpose(X),
                R.Scalar(col)).reshape(shape=[num_observations, 1])
            all_data = R.concat(temp_X, temp_y, axis=1)

            column = R.gather(R.transpose(X), R.Scalar(col))
            ind = column.find_indices(R.sort(R.unique(column)))
            while ind.status != "computed":
                pass
            inform_server()
            sorted_data = R.Tensor([])
            for i in ind.output:
                sorted_data = sorted_data.concat(all_data.gather(
                    R.Tensor(i)))  # need to find another way to sort
            sorted_data_tpose = sorted_data.transpose()
            thresholds = sorted_data_tpose.gather(R.Scalar(0)).gather(
                R.Scalar(0))
            obs_classes = sorted_data_tpose.gather(R.Scalar(1)).gather(
                R.Scalar(0))

            num_left = R.Tensor([0] * self.num_classes)  # need ops
            num_right = count_in_parent
            for i in range(1, num_observations):
                class_ = R.gather(obs_classes, R.Tensor([i - 1]))
                classencoding = R.one_hot_encoding(
                    class_, depth=self.num_classes).gather(R.Scalar(0))
                num_left = num_left.add(classencoding)
                num_right = num_right.sub(classencoding)

                gini_left = R.sub(
                    R.Scalar(1),
                    R.sum(
                        R.square(R.foreach(num_left, operation='div',
                                           params=i))))
                gini_right = R.sub(
                    R.Scalar(1),
                    R.sum(
                        R.square(
                            R.foreach(num_right,
                                      operation='div',
                                      params=num_observations - i))))
                gini = R.div(
                    R.add(
                        R.multiply(R.Scalar(i), gini_left),
                        R.multiply(R.Scalar(num_observations - i),
                                   gini_right)), R.Scalar(num_observations))

                decision1 = R.logical_and(thresholds.gather(R.Tensor([i])),
                                          thresholds.gather(R.Tensor([i - 1])))
                decision2 = gini.less(best_gini)
                while decision2.status != "computed":
                    pass

                print(decision2.output == 1)
                if decision2.output == 1 and decision1 != 1:
                    best_gini = gini
                    ideal_col = col
                    ideal_threshold = R.div(
                        R.add(thresholds.gather(R.Tensor([i])),
                              thresholds.gather(R.Tensor([i - 1]))),
                        R.Scalar(2))
        print(ideal_col, ideal_threshold)
        return ideal_col, ideal_threshold
示例#6
0
def recall(true_labels, pred_labels):
    var = R.equal(true_labels, pred_labels)
    [TP, TN, FN, FP] = get_TP_TN_FN_FP(true_labels, pred_labels)
    return R.div(TP, R.add(TP, FN))
示例#7
0
def precision(true_labels, pred_labels):
    var = R.sum(R.equal(true_labels, pred_labels))
    [TP, TN, FN, FP] = get_TP_TN_FN_FP(true_labels, pred_labels)
    return R.div(TP, R.add(TP, FP))