Пример #1
0
    def testCalculateBestMultiDimFeatureSplitsWithNoSplitOnFeaturePossible(
            self):
        """Testing best split calculation with min node weight and no split."""
        node_id_range = [1, 3]  # node 1 through 2 will be processed.
        stats_summary = np.asarray([
            [
                [[0., 0.], [.08, .09], [0., 0.], [0., 0.]],  # node 0; ignored
                [[0., 0.], [.15, .36], [.06, .7], [.1, .2]],  # node 1
                [[0., 0.], [-.33, .068], [0., 0.], [.3, .04]],  # node 2
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
            ],  # feature 0
            [
                [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0; ignored
                [[0., 0.], [.3, .5], [-.05, .06], [.06, .7]],  # node 1
                [[.1, .1], [.2, -.05], [-.4, .05], [.07, .08]],  # node 2
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
            ],  # feature 1
        ])  # num_features * shape=[max_splits, num_buckets, 2]
        # reshape to [max_splits, num_features, num_buckets, 2]
        stats_summary = np.moveaxis(stats_summary, 0, 1)

        (node_ids, _, _, _, _, _,
         _) = boosted_trees_ops.calculate_best_feature_split(
             node_id_range,
             stats_summary,
             l1=0.0,
             l2=0.0,
             tree_complexity=0.0,
             min_node_weight=1,
             logits_dimension=1)

        # We can't split either of the nodes on the first feature
        self.assertAllEqual([1], node_ids)

        # Now check when we can't split on any feature
        (node_ids, _, _, _, _, _,
         _) = boosted_trees_ops.calculate_best_feature_split(
             node_id_range,
             stats_summary,
             l1=0.0,
             l2=0.0,
             tree_complexity=0.0,
             min_node_weight=10,
             logits_dimension=1)
        self.assertAllEqual([], node_ids)
Пример #2
0
  def testCalculateBestMultiDimFeatureSplitsWithNoSplitOnFeaturePossible(self):
    """Testing best split calculation with min node weight and no split."""
    node_id_range = [1, 3]  # node 1 through 2 will be processed.
    stats_summary = np.asarray([
        [
            [[0., 0.], [.08, .09], [0., 0.], [0., 0.]],  # node 0; ignored
            [[0., 0.], [.15, .36], [.06, .7], [.1, .2]],  # node 1
            [[0., 0.], [-.33, .068], [0., 0.], [.3, .04]],  # node 2
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
        ],  # feature 0
        [
            [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0; ignored
            [[0., 0.], [.3, .5], [-.05, .06], [.06, .7]],  # node 1
            [[.1, .1], [.2, -.05], [-.4, .05], [.07, .08]],  # node 2
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
        ],  # feature 1
    ])  # num_features * shape=[max_splits, num_buckets, 2]
    # reshape to [max_splits, num_features, num_buckets, 2]
    stats_summary = np.moveaxis(stats_summary, 0, 1)

    (node_ids, _, _, _, _, _,
     _) = boosted_trees_ops.calculate_best_feature_split(
         node_id_range,
         stats_summary,
         l1=0.0,
         l2=0.0,
         tree_complexity=0.0,
         min_node_weight=1,
         logits_dimension=1)

    # We can't split either of the nodes on the first feature
    self.assertAllEqual([1], node_ids)

    # Now check when we can't split on any feature
    (node_ids, _, _, _, _, _,
     _) = boosted_trees_ops.calculate_best_feature_split(
         node_id_range,
         stats_summary,
         l1=0.0,
         l2=0.0,
         tree_complexity=0.0,
         min_node_weight=10,
         logits_dimension=1)
    self.assertAllEqual([], node_ids)
Пример #3
0
    def testCalculateBestMultiDimFeatureSplitsWithoutRegularization(self):
        """Testing best split calculation without any regularization."""
        node_id_range = [1, 3]  # node 1 through 2 will be processed.
        stats_summary = np.asarray(self._get_stats_summary_for_split())
        # reshape to [max_splits, num_features, num_buckets, 2]
        stats_summary = np.moveaxis(stats_summary, 0, 1)

        (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
         right_node_contribs, split_types) = self.evaluate(
             boosted_trees_ops.calculate_best_feature_split(
                 node_id_range,
                 stats_summary,
                 l1=0.0,
                 l2=0.0,
                 tree_complexity=0.0,
                 min_node_weight=0,
                 logits_dimension=1))

        # Get same result as v1 op (CalculateBestGainsPerFeature), and find the
        # feature dimension that has the best gain.
        self.assertAllEqual([1, 2], node_ids)
        self.assertAllClose([0.02823, 0.41184], gains)
        self.assertAllEqual([1, 1], thresholds)
        self.assertAllEqual([1, 0], feature_dimensions)
        # # The left node contrib will be later added to the previous node value to
        # # make the left node value, and the same for right node contrib.
        self.assertAllClose([[-.6], [.568966]], left_node_contribs)
        self.assertAllClose([[-.076923], [-.75]], right_node_contribs)
        self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
Пример #4
0
    def testCalculateBestMultiDimFeatureSplitsWithTreeComplexity(self):
        """Testing best split calculation with tree complexity."""
        node_id_range = [1, 3]  # node 1 through 2 will be processed.
        stats_summary = np.asarray(self._get_stats_summary_for_split())
        # reshape to [max_splits, num_features, num_buckets, 2]
        stats_summary = np.moveaxis(stats_summary, 0, 1)

        l2 = 0.1
        tree_complexity = 3.
        (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
         right_node_contribs, split_types) = self.evaluate(
             boosted_trees_ops.calculate_best_feature_split(
                 node_id_range,
                 stats_summary,
                 l1=0.,
                 l2=l2,
                 tree_complexity=tree_complexity,
                 min_node_weight=0,
                 logits_dimension=1))

        # Get same result as v1 op (CalculateBestGainsPerFeature), and find the
        # feature dimension that has the best gain.
        self.assertAllEqual([1, 2], node_ids)
        # Gain should also include an adjustment of the gradient by l1.
        self.assertAllClose([-2.98120904, -2.66068625], gains)
        self.assertAllEqual([1, 1], thresholds)
        self.assertAllClose([[-0.5], [0.485294]], left_node_contribs)
        self.assertAllClose([[-0.043478], [-.6]], right_node_contribs)
        self.assertAllEqual([1, 0], feature_dimensions)
        self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
Пример #5
0
  def testCalculateBestMultiDimFeatureSplitsWithoutRegularization(self):
    """Testing best split calculation without any regularization."""
    node_id_range = [1, 3]  # node 1 through 2 will be processed.
    stats_summary = np.asarray(self._get_stats_summary_for_split())
    # reshape to [max_splits, num_features, num_buckets, 2]
    stats_summary = np.moveaxis(stats_summary, 0, 1)

    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
     right_node_contribs, split_types) = self.evaluate(
         boosted_trees_ops.calculate_best_feature_split(
             node_id_range,
             stats_summary,
             l1=0.0,
             l2=0.0,
             tree_complexity=0.0,
             min_node_weight=0,
             logits_dimension=1))

    # Get same result as v1 op (CalculateBestGainsPerFeature), and find the
    # feature dimension that has the best gain.
    self.assertAllEqual([1, 2], node_ids)
    self.assertAllClose([0.02823, 0.41184], gains)
    self.assertAllEqual([1, 1], thresholds)
    self.assertAllEqual([1, 0], feature_dimensions)
    # # The left node contrib will be later added to the previous node value to
    # # make the left node value, and the same for right node contrib.
    self.assertAllClose([[-.6], [.568966]], left_node_contribs)
    self.assertAllClose([[-.076923], [-.75]], right_node_contribs)
    self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
Пример #6
0
  def testCalculateBestMultiDimFeatureSplitsWithTreeComplexity(self):
    """Testing best split calculation with tree complexity."""
    node_id_range = [1, 3]  # node 1 through 2 will be processed.
    stats_summary = np.asarray(self._get_stats_summary_for_split())
    # reshape to [max_splits, num_features, num_buckets, 2]
    stats_summary = np.moveaxis(stats_summary, 0, 1)

    l2 = 0.1
    tree_complexity = 3.
    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
     right_node_contribs, split_types) = self.evaluate(
         boosted_trees_ops.calculate_best_feature_split(
             node_id_range,
             stats_summary,
             l1=0.,
             l2=l2,
             tree_complexity=tree_complexity,
             min_node_weight=0,
             logits_dimension=1))

    # Get same result as v1 op (CalculateBestGainsPerFeature), and find the
    # feature dimension that has the best gain.
    self.assertAllEqual([1, 2], node_ids)
    # Gain should also include an adjustment of the gradient by l1.
    self.assertAllClose([-2.98120904, -2.66068625], gains)
    self.assertAllEqual([1, 1], thresholds)
    self.assertAllClose([[-0.5], [0.485294]], left_node_contribs)
    self.assertAllClose([[-0.043478], [-.6]], right_node_contribs)
    self.assertAllEqual([1, 0], feature_dimensions)
    self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
Пример #7
0
    def testCalculateMultiDimBestSplitsWithMinNodeWeight(self):
        """Testing best split calculation with min node weight."""
        node_id_range = [1, 3]  # node 1 through 2 will be processed.
        stats_summary = np.asarray([
            [
                [[0., 0.], [.08, .09], [0., 0.], [0., 0.]],  # node 0; ignored
                [[0., 0.], [.15, .36], [.06, .61], [.1, .2]],  # node 1
                [[0., 0.], [-.33, .68], [0., 0.], [.3, .4]],  # node 2
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
            ],  # feature 0
            [
                [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0; ignored
                [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]],  # node 1
                [[.1, 1.], [.2, -.05], [-.4, .05], [.07, .08]],  # node 2
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
                [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
            ],  # feature 1
        ])  # num_features * shape=[max_splits, num_buckets, 2]
        # reshape to [max_splits, num_features, num_buckets, 2]
        stats_summary = np.moveaxis(stats_summary, 0, 1)

        (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
         right_node_contribs, split_types) = self.evaluate(
             boosted_trees_ops.calculate_best_feature_split(
                 node_id_range,
                 stats_summary,
                 l1=0.,
                 l2=0.,
                 tree_complexity=0.,
                 min_node_weight=1,
                 logits_dimension=1))

        self.assertAllEqual([1, 2], node_ids)
        # Gain should also include an adjustment of the gradient by l1.
        self.assertAllClose([0.098013, 0.931596], gains)
        self.assertAllEqual([1, 1], thresholds)
        self.assertAllClose([[-.6], [-0.315789]], left_node_contribs)
        self.assertAllClose([[-0.014925], [2.53846]], right_node_contribs)
        self.assertAllEqual([1, 1], feature_dimensions)
        self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
Пример #8
0
  def testCalculateMultiDimBestSplitsWithMinNodeWeight(self):
    """Testing best split calculation with min node weight."""
    node_id_range = [1, 3]  # node 1 through 2 will be processed.
    stats_summary = np.asarray([
        [
            [[0., 0.], [.08, .09], [0., 0.], [0., 0.]],  # node 0; ignored
            [[0., 0.], [.15, .36], [.06, .61], [.1, .2]],  # node 1
            [[0., 0.], [-.33, .68], [0., 0.], [.3, .4]],  # node 2
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
        ],  # feature 0
        [
            [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0; ignored
            [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]],  # node 1
            [[.1, 1.], [.2, -.05], [-.4, .05], [.07, .08]],  # node 2
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
        ],  # feature 1
    ])  # num_features * shape=[max_splits, num_buckets, 2]
    # reshape to [max_splits, num_features, num_buckets, 2]
    stats_summary = np.moveaxis(stats_summary, 0, 1)

    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
     right_node_contribs, split_types) = self.evaluate(
         boosted_trees_ops.calculate_best_feature_split(
             node_id_range,
             stats_summary,
             l1=0.,
             l2=0.,
             tree_complexity=0.,
             min_node_weight=1,
             logits_dimension=1))

    self.assertAllEqual([1, 2], node_ids)
    # Gain should also include an adjustment of the gradient by l1.
    self.assertAllClose([0.098013, 0.931596], gains)
    self.assertAllEqual([1, 1], thresholds)
    self.assertAllClose([[-.6], [-0.315789]], left_node_contribs)
    self.assertAllClose([[-0.014925], [2.53846]], right_node_contribs)
    self.assertAllEqual([1, 1], feature_dimensions)
    self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)