def test_union_of_continuous_dimensions(self): A = ContinuousDimension(name='x', min=0, max=1) B = ContinuousDimension(name='x', min=2, max=3) C = A.union(B) self.assertTrue(0.5 in C) self.assertTrue(2.5 in C) self.assertTrue(1.5 not in C)
def test_union_of_continuous_dimensions(self): A = ContinuousDimension(name='x', min=0, max=1) B = ContinuousDimension(name='x', min=2, max=3) C = A.union(B) assert 0.5 in C assert 2.5 in C assert 1.5 not in C
def test_composition_of_arbitrary_dimensions(self): C1 = ContinuousDimension(name='x', min=0, max=1) C2 = ContinuousDimension(name='x', min=1, max=2) C3 = C1 - C2 D = DiscreteDimension(name='x', min=0, max=1) self.assertRaises(TypeError, C1.intersection, D) self.assertRaises(TypeError, C3.intersection, D) self.assertRaises(TypeError, D.intersection, C1)
def test_composition_of_arbitrary_dimensions(self): C1 = ContinuousDimension(name='x', min=0, max=1) C2 = ContinuousDimension(name='x', min=1, max=2) C3 = C1 - C2 D = DiscreteDimension(name='x', min=0, max=1) with pytest.raises(TypeError): C1.intersection(D) with pytest.raises(TypeError): C3.intersection(D) with pytest.raises(TypeError): D.intersection(C1)
def setup_method(self, method) -> None: self.small_square = SimpleHypergrid(name="small_square", dimensions=[ ContinuousDimension(name='x', min=1, max=2), ContinuousDimension(name='y', min=1, max=2) ]) self.big_square = SimpleHypergrid(name="big_square", dimensions=[ ContinuousDimension(name='x', min=0, max=3), ContinuousDimension(name='y', min=0, max=3) ]) self.small_grid = SimpleHypergrid(name="small_grid", dimensions=[ DiscreteDimension(name='x', min=1, max=2), DiscreteDimension(name='y', min=1, max=2) ]) self.big_grid = SimpleHypergrid(name="big_grid", dimensions=[ DiscreteDimension(name='x', min=0, max=3), DiscreteDimension(name='y', min=0, max=3), DiscreteDimension(name='z', min=0, max=3) ]) self.all_grids = [ self.small_square, self.big_square, self.small_grid, self.big_grid, ]
def test_inserting_overlapping_chunks(self): zero_to_ten = ContinuousDimension(name='x', min=0, max=10) five_to_fifteen = ContinuousDimension(name='x', min=5, max=15) interval_tree = IntervalTree(name='x', chunks_type=ContinuousDimension) interval_tree.add(zero_to_ten) interval_tree.add(five_to_fifteen) only_node = interval_tree.root self.assertTrue(only_node.left is None) self.assertTrue(only_node.right is None) self.assertTrue(only_node.parent is None) resulting_chunk = only_node.payload self.assertTrue(resulting_chunk.min == 0) self.assertTrue(resulting_chunk.max == 15)
def test_pop_overlapping_chunks(self): """ Exercises IntervalTree.pop_overlapping_chunks() method. """ zero_to_one = ContinuousDimension(name='x', min=0, max=1) two_to_three = ContinuousDimension(name='x', min=2, max=3) five_to_seven = ContinuousDimension(name='x', min=5, max=7) minus_one_to_four = ContinuousDimension(name='x', min=-1, max=4) interval_tree = IntervalTree(name='x', chunks_type=ContinuousDimension) interval_tree.add(zero_to_one) interval_tree.add(two_to_three) interval_tree.add(five_to_seven) overlapping_chunks = interval_tree.pop_overlapping_chunks( chunk=minus_one_to_four) self.assertTrue(len(overlapping_chunks) == 2) for chunk in overlapping_chunks: self.assertTrue(chunk in (zero_to_one, two_to_three))
def enumerate_possibly_overlapping_continuous_intervals( self, interval, gap_width): """ For a given interval, enumerates a list of intervals and a boolean indicating overlap. """ min_for_preceding_intervals = interval.min - gap_width / 2.0 interval_width = (interval.max - interval.min) preceding = ContinuousDimension(name=interval.name, min=min_for_preceding_intervals, max=interval.min - gap_width / 3.0) yield preceding, False preceding_contiguous = ContinuousDimension( name=interval.name, min=min_for_preceding_intervals, max=interval.min, include_max=not interval.include_min) yield preceding, False if interval.include_min: preceding_overlapping_at_min = ContinuousDimension( name=interval.name, min=min_for_preceding_intervals, max=interval.min) yield preceding_overlapping_at_min, True overlapping_at_front = ContinuousDimension( name=interval.name, min=min_for_preceding_intervals, max=interval.min + interval_width / 2.0) yield overlapping_at_front, True contained = ContinuousDimension( name=interval.name, min=interval.min + interval_width / 3.0, max=interval.max + interval_width * 2.0 / 3.0) yield contained, True
def test_containment(self): long_segment = ContinuousDimension(name='x', min=0, max=100 * 1000) short_segment = ContinuousDimension(name='x', min=0, max=100) long_linear_sequence = DiscreteDimension(name='x', min=0, max=100 * 1000) short_linear_sequence = DiscreteDimension(name='x', min=0, max=100 * 1000) long_fibonacci_sequence = OrdinalDimension( name='x', ordered_values=[i for i in fibonacci(max=100 * 1000)], ascending=True) short_fibonacci_sequence = OrdinalDimension( name='x', ordered_values=[i for i in fibonacci(max=100)], ascending=True) a_few_options = CategoricalDimension(name='x', values=[5, 13, 34]) boolean_choice = CategoricalDimension(name='x', values=[True, False]) for dimension in [ short_segment, long_linear_sequence, short_linear_sequence, long_fibonacci_sequence, short_fibonacci_sequence, a_few_options ]: self.assertTrue(dimension in long_segment) self.assertTrue(short_fibonacci_sequence in long_fibonacci_sequence) self.assertTrue(a_few_options in short_fibonacci_sequence) self.assertTrue(True in boolean_choice) self.assertTrue(12 in long_segment)
def _fit_root_regression( self, x: pd.DataFrame, y: pd.DataFrame, iteration_number: int ): # TODO : Add back RidgeCV option after creating RidgeCrossValidatedRegressionModel assert \ self.model_config.boosting_root_model_name in [ LassoCrossValidatedRegressionModel.__name__ ], f'Unrecognized boosting_root_model_name {self.model_config.boosting_root_model_name}' # Since the RERF transform_x created the proper design_matrix, this serves as the input space for the root regression model. # Hence the code below creates a (temporary) hypergrid reflecting the design_matrix. # This is less than ideal solution, but deriving min and max of polynomial terms (given feature column degrees) is non-trivial # TODO: set bounds on the polynomial terms correctly and eliminate the hack forcing the base_regressor to skip filtering invalid features design_matrix_hypergrid = SimpleHypergrid( name='RegressionEnhanceRandomForest_design_matrix', dimensions=None ) for design_matrix_column_name in x.columns.values: design_matrix_dimension = ContinuousDimension( name=design_matrix_column_name, min=x[design_matrix_column_name].min(), max=x[design_matrix_column_name].max() ) design_matrix_hypergrid.add_dimension(design_matrix_dimension) # fit lasso/ridge model using either specified params from __init__ or hyper-parameter search if self.model_config.boosting_root_model_name == LassoCrossValidatedRegressionModel.__name__: root_model_config = self.model_config.dimension_value_dict['lasso_regression_model_config'] self.base_regressor_ = LassoCrossValidatedRegressionModel( model_config=root_model_config, input_space=design_matrix_hypergrid, output_space=self.output_space ) # skips filtering to valid features in the base_regressor since the valid range of design_matrix column values is incorrect above self.base_regressor_.skip_input_filtering_on_predict = True self.base_regressor_.fit( x, y, iteration_number=iteration_number ) return self
def test_pop_overlapping_chunks_2(self): """ Exercises IntervalTree.pop_overlapping_chunks() method more throughly. Let's make a bunch of intervals, place them in the tree and then let's pop_overlappnig_chunks from the tree. To test the widest array of code paths, let's make a tree with a lot of evenly spaced intervals. Then, let's select one at random and produce intervals that should overlap. """ random.seed(2) num_intervals_in_tree = 20 intervals_width = 10 gap_between_intervals = 1000 intervals = [] for i in range(num_intervals_in_tree): interval_min = i * (gap_between_intervals + intervals_width) interval_max = interval_min + intervals_width intervals.append( ContinuousDimension(name='x', min=interval_min, max=interval_max)) # let's shuffle the intervals intervals = sorted(intervals, key=lambda interval: random.random()) # Let's run a test suite for all intervals in the tree for i in range(num_intervals_in_tree): overlapping_interval = intervals[i] for possibly_overlapping_interval, overlaps \ in self.enumerate_possibly_overlapping_continuous_intervals(overlapping_interval, gap_width=gap_between_intervals): # we gotta make a new tree every time interval_tree = self.make_tree(intervals) overlapping_chunks = interval_tree.pop_overlapping_chunks( possibly_overlapping_interval) if overlaps: self.assertTrue( overlapping_chunks[0] == overlapping_interval) else: self.assertTrue(len(overlapping_chunks) == 0)
def test_arbitrary_composition_of_continuous_dimensions(self): A = ContinuousDimension(name='x', min=0, max=1) B = ContinuousDimension(name='x', min=2, max=3) C = ContinuousDimension(name='x', min=2.5, max=3.5) D = A.union(B) - C E = B - C F = A.union(E) assert 0.5 in D assert 1.5 not in D assert 2.5 not in D assert 3.4 not in D assert 35 not in D assert 2 in E assert 2.5 not in E assert 0 in F and 1 in F and 1.5 not in F and 2 in F and 2.5 not in F
def test_arbitrary_composition_of_continuous_dimensions(self): A = ContinuousDimension(name='x', min=0, max=1) B = ContinuousDimension(name='x', min=2, max=3) C = ContinuousDimension(name='x', min=2.5, max=3.5) D = A.union(B) - C E = B - C F = A.union(E) self.assertTrue(0.5 in D) self.assertTrue(1.5 not in D) self.assertTrue(2.5 not in D) self.assertTrue(3.4 not in D) self.assertTrue(35 not in D) self.assertTrue(2 in E) self.assertTrue(2.5 not in E) self.assertTrue(0 in F and 1 in F and 1.5 not in F and 2 in F and 2.5 not in F)
def setUp(self): self.empty = ContinuousDimension(name='x', min=0, max=0, include_min=False, include_max=False) self.should_be_empty = ContinuousDimension(name='x', min=0, max=0, include_min=False, include_max=True) self.should_be_empty_too = ContinuousDimension(name='x', min=0, max=0, include_min=True, include_max=False) self.should_contain_zero = ContinuousDimension(name='x', min=0, max=0, include_min=True, include_max=True) self.closed = ContinuousDimension(name='x', min=0, max=1) self.left_open = ContinuousDimension(name='x', min=0, max=1, include_min=False) self.right_open = ContinuousDimension(name='x', min=0, max=1, include_max=False) self.open = ContinuousDimension(name='x', min=0, max=1, include_min=False, include_max=False) self.inner = ContinuousDimension(name='x', min=0.2, max=0.8) self.outer = ContinuousDimension(name='x', min=-0.2, max=1.2) self.left_overlapping = ContinuousDimension(name='x', min=-0.2, max=0.8) self.right_overlapping = ContinuousDimension(name='x', min=0.2, max=1.2) self.inner_wrongly_named = ContinuousDimension(name='y', min=0.2, max=0.8) self.one_to_five = ContinuousDimension(name='x', min=1, max=5) self.six_to_ten = ContinuousDimension(name='x', min=6, max=10)
class TestContinuousDimension(unittest.TestCase): def setUp(self): self.empty = ContinuousDimension(name='x', min=0, max=0, include_min=False, include_max=False) self.should_be_empty = ContinuousDimension(name='x', min=0, max=0, include_min=False, include_max=True) self.should_be_empty_too = ContinuousDimension(name='x', min=0, max=0, include_min=True, include_max=False) self.should_contain_zero = ContinuousDimension(name='x', min=0, max=0, include_min=True, include_max=True) self.closed = ContinuousDimension(name='x', min=0, max=1) self.left_open = ContinuousDimension(name='x', min=0, max=1, include_min=False) self.right_open = ContinuousDimension(name='x', min=0, max=1, include_max=False) self.open = ContinuousDimension(name='x', min=0, max=1, include_min=False, include_max=False) self.inner = ContinuousDimension(name='x', min=0.2, max=0.8) self.outer = ContinuousDimension(name='x', min=-0.2, max=1.2) self.left_overlapping = ContinuousDimension(name='x', min=-0.2, max=0.8) self.right_overlapping = ContinuousDimension(name='x', min=0.2, max=1.2) self.inner_wrongly_named = ContinuousDimension(name='y', min=0.2, max=0.8) self.one_to_five = ContinuousDimension(name='x', min=1, max=5) self.six_to_ten = ContinuousDimension(name='x', min=6, max=10) def test_string_representation(self): self.assertTrue(str(self.empty) == "x: (0.00, 0.00)") self.assertTrue(str(self.should_be_empty) == "x: (0.00, 0.00)") self.assertTrue(str(self.should_be_empty_too) == "x: (0.00, 0.00)") self.assertTrue(str(self.should_contain_zero) == "x: [0.00, 0.00]") self.assertTrue(str(self.closed) == "x: [0.00, 1.00]") self.assertTrue(str(self.left_open) == "x: (0.00, 1.00]") self.assertTrue(str(self.right_open) == "x: [0.00, 1.00)") self.assertTrue(str(self.open) == "x: (0.00, 1.00)") self.assertTrue(str(self.inner) == "x: [0.20, 0.80]") self.assertTrue(str(self.outer) == "x: [-0.20, 1.20]") self.assertTrue(str(self.left_overlapping) == "x: [-0.20, 0.80]") self.assertTrue(str(self.right_overlapping) == "x: [0.20, 1.20]") self.assertTrue(str(self.inner_wrongly_named) == "y: [0.20, 0.80]") def test_point_containment(self): self.assertTrue(0 not in self.empty and 0 not in self.should_be_empty and 0 not in self.should_be_empty_too and 0 in self.should_contain_zero) self.assertTrue(-1 not in self.closed and -1 not in self.left_open and -1 not in self.right_open and -1 not in self.open) self.assertTrue(0 in self.closed and 0 not in self.left_open and 0 in self.right_open and 0 not in self.open) self.assertTrue(0.5 in self.closed and 0.5 in self.left_open and 0.5 in self.right_open and 0.5 in self.open) self.assertTrue(1 in self.closed and 1 in self.left_open and 1 not in self.right_open and 1 not in self.open) self.assertTrue(2 not in self.closed and 2 not in self.left_open and 2 not in self.right_open and 2 not in self.open) def test_continuous_dimension_containment(self): self.assertTrue(self.open in self.closed) self.assertTrue(self.left_open in self.closed) self.assertTrue(self.right_open in self.closed) self.assertTrue(self.left_open not in self.open) self.assertTrue(self.right_open not in self.open) self.assertTrue(self.closed not in self.open) self.assertTrue(self.left_open not in self.right_open) self.assertTrue(self.right_open not in self.left_open) self.assertTrue(self.inner in self.closed) self.assertTrue(self.inner in self.open) self.assertTrue(self.inner in self.left_open) self.assertTrue(self.inner in self.right_open) self.assertTrue(self.closed in self.outer) self.assertTrue(self.open in self.outer) self.assertTrue(self.left_open in self.outer) self.assertTrue(self.right_open in self.outer) self.assertTrue(self.inner_wrongly_named not in self.closed) self.assertTrue(self.inner_wrongly_named not in self.open) self.assertTrue(self.inner_wrongly_named not in self.left_open) self.assertTrue(self.inner_wrongly_named not in self.right_open) def test_continuous_dimension_set_operations(self): self.assertTrue(self.inner in self.inner.union(self.closed)) self.assertTrue(self.inner in self.inner.intersection(self.closed)) self.assertTrue(self.open in self.open.intersection(self.closed)) self.assertTrue(self.closed not in self.open.intersection(self.closed)) self.assertTrue(self.closed in self.open.union(self.closed)) self.assertTrue(self.closed in self.left_open.union(self.right_open)) self.assertTrue( self.left_open.intersection(self.right_open) in self.open) def test_random(self): self.assertTrue(self.empty.random() is None) self.assertTrue(self.outer.random() in self.outer) for _ in range(1000): self.assertTrue(self.one_to_five.random() not in self.six_to_ten)
class TestContinuousDimension: def setup_method(self, method): self.empty = ContinuousDimension(name='x', min=0, max=0, include_min=False, include_max=False) self.unbounded_continuous = ContinuousDimension(name='x', min=0, max=math.inf) self.unbounded_discrete = DiscreteDimension(name='x', min=0, max=math.inf) self.should_be_empty = ContinuousDimension(name='x', min=0, max=0, include_min=False, include_max=True) self.should_be_empty_too = ContinuousDimension(name='x', min=0, max=0, include_min=True, include_max=False) self.should_contain_zero = ContinuousDimension(name='x', min=0, max=0, include_min=True, include_max=True) self.closed = ContinuousDimension(name='x', min=0, max=1) self.left_open = ContinuousDimension(name='x', min=0, max=1, include_min=False) self.right_open = ContinuousDimension(name='x', min=0, max=1, include_max=False) self.open = ContinuousDimension(name='x', min=0, max=1, include_min=False, include_max=False) self.inner = ContinuousDimension(name='x', min=0.2, max=0.8) self.outer = ContinuousDimension(name='x', min=-0.2, max=1.2) self.left_overlapping = ContinuousDimension(name='x', min=-0.2, max=0.8) self.right_overlapping = ContinuousDimension(name='x', min=0.2, max=1.2) self.inner_wrongly_named = ContinuousDimension(name='y', min=0.2, max=0.8) self.one_to_five = ContinuousDimension(name='x', min=1, max=5) self.six_to_ten = ContinuousDimension(name='x', min=6, max=10) def test_string_representation(self): assert str(self.empty) == "x: (0.00, 0.00)" assert str(self.should_be_empty) == "x: (0.00, 0.00)" assert str(self.should_be_empty_too) == "x: (0.00, 0.00)" assert str(self.should_contain_zero) == "x: [0.00, 0.00]" assert str(self.closed) == "x: [0.00, 1.00]" assert str(self.left_open) == "x: (0.00, 1.00]" assert str(self.right_open) == "x: [0.00, 1.00)" assert str(self.open) == "x: (0.00, 1.00)" assert str(self.inner) == "x: [0.20, 0.80]" assert str(self.outer) == "x: [-0.20, 1.20]" assert str(self.left_overlapping) == "x: [-0.20, 0.80]" assert str(self.right_overlapping) == "x: [0.20, 1.20]" assert str(self.inner_wrongly_named) == "y: [0.20, 0.80]" def test_point_containment(self): assert (0 not in self.empty and 0 not in self.should_be_empty and 0 not in self.should_be_empty_too and 0 in self.should_contain_zero) assert (-1 not in self.closed and -1 not in self.left_open and -1 not in self.right_open and -1 not in self.open) assert (0 in self.closed and 0 not in self.left_open and 0 in self.right_open and 0 not in self.open) assert (0.5 in self.closed and 0.5 in self.left_open and 0.5 in self.right_open and 0.5 in self.open) assert (1 in self.closed and 1 in self.left_open and 1 not in self.right_open and 1 not in self.open) assert (2 not in self.closed and 2 not in self.left_open and 2 not in self.right_open and 2 not in self.open) def test_continuous_dimension_containment(self): assert self.open in self.closed assert self.left_open in self.closed assert self.right_open in self.closed assert self.left_open not in self.open assert self.right_open not in self.open assert self.closed not in self.open assert self.left_open not in self.right_open assert self.right_open not in self.left_open assert self.inner in self.closed assert self.inner in self.open assert self.inner in self.left_open assert self.inner in self.right_open assert self.closed in self.outer assert self.open in self.outer assert self.left_open in self.outer assert self.right_open in self.outer assert self.inner_wrongly_named not in self.closed assert self.inner_wrongly_named not in self.open assert self.inner_wrongly_named not in self.left_open assert self.inner_wrongly_named not in self.right_open def test_continuous_dimension_set_operations(self): assert self.inner in self.inner.union(self.closed) assert self.inner in self.inner.intersection(self.closed) assert self.open in self.open.intersection(self.closed) assert self.closed not in self.open.intersection(self.closed) assert self.closed in self.open.union(self.closed) assert self.closed in self.left_open.union(self.right_open) assert self.left_open.intersection(self.right_open) in self.open def test_random(self): with pytest.raises(ValueError): self.empty.random() with pytest.raises(ValueError): self.unbounded_continuous.random() with pytest.raises(OverflowError): self.unbounded_discrete.random() assert self.outer.random() in self.outer for _ in range(1000): assert self.one_to_five.random() not in self.six_to_ten
def setup_method(self, method): self.empty = ContinuousDimension(name='x', min=0, max=0, include_min=False, include_max=False) self.unbounded_continuous = ContinuousDimension(name='x', min=0, max=math.inf) self.unbounded_discrete = DiscreteDimension(name='x', min=0, max=math.inf) self.should_be_empty = ContinuousDimension(name='x', min=0, max=0, include_min=False, include_max=True) self.should_be_empty_too = ContinuousDimension(name='x', min=0, max=0, include_min=True, include_max=False) self.should_contain_zero = ContinuousDimension(name='x', min=0, max=0, include_min=True, include_max=True) self.closed = ContinuousDimension(name='x', min=0, max=1) self.left_open = ContinuousDimension(name='x', min=0, max=1, include_min=False) self.right_open = ContinuousDimension(name='x', min=0, max=1, include_max=False) self.open = ContinuousDimension(name='x', min=0, max=1, include_min=False, include_max=False) self.inner = ContinuousDimension(name='x', min=0.2, max=0.8) self.outer = ContinuousDimension(name='x', min=-0.2, max=1.2) self.left_overlapping = ContinuousDimension(name='x', min=-0.2, max=0.8) self.right_overlapping = ContinuousDimension(name='x', min=0.2, max=1.2) self.inner_wrongly_named = ContinuousDimension(name='y', min=0.2, max=0.8) self.one_to_five = ContinuousDimension(name='x', min=1, max=5) self.six_to_ten = ContinuousDimension(name='x', min=6, max=10)