def test_attach_to_indexes(self): index_combination = [self.index_1, self.index_2] candidate = self.index_3 self.algo.initial_cost = 10 best = {"combination": [], "benefit_to_size_ratio": 0} self.algo._evaluate_combination = MagicMock() self.algo._attach_to_indexes( index_combination, candidate, best, self.algo.initial_cost ) first_new_combination = [ index_combination[1], Index(index_combination[0].columns + candidate.columns), ] self.algo._evaluate_combination.assert_any_call( first_new_combination, best, self.algo.initial_cost, 5 ) second_new_combination = [ index_combination[0], Index(index_combination[1].columns + candidate.columns), ] self.algo._evaluate_combination.assert_any_call( second_new_combination, best, self.algo.initial_cost, 1 ) multi_column_candidate = Index([self.column_2, self.column_3]) with self.assertRaises(AssertionError): self.algo._attach_to_indexes( index_combination, multi_column_candidate, best, self.algo.initial_cost )
def test_calculate_indexes_1MB_2column(self, get_utilized_indexes_mock): algorithm = RelaxationAlgorithm( database_connector=self.connector, parameters={ "max_index_width": 2, "budget_MB": 1 }, ) algorithm.cost_evaluation.cache = mock_cache algorithm.cost_evaluation._prepare_cost_calculation = ( self.set_estimated_index_sizes) algorithm.cost_evaluation.estimate_size = self.set_estimated_index_size get_utilized_indexes_mock.return_value = ( { Index([column_A_0], 1000 * 1000), Index([column_A_0, column_A_1], 2000 * 1000), }, None, ) index_selection = algorithm.calculate_best_indexes( Workload([query_0, query_1])) # The single column index is dropped first, because of the lower penalty. # The multi column index is prefixed second. self.assertEqual(set(index_selection), {Index([column_A_0])})
def setUp(self): self.connector = MockConnector() self.algo = DropHeuristicAlgorithm(database_connector=self.connector) self.column_0 = Column("Col0") self.column_1 = Column("Col1") self.column_2 = Column("Col2") self.all_columns = [self.column_0, self.column_1, self.column_2] self.table = Table("TableA") self.table.add_columns(self.all_columns) self.index_0 = Index([self.column_0]) self.index_1 = Index([self.column_1]) self.index_2 = Index([self.column_2]) query_0 = Query(0, "SELECT * FROM TableA WHERE Col0 = 4;", [self.column_0]) query_1 = Query( 1, "SELECT * FROM TableA WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;", self.all_columns, ) self.database_name = "test_DB" self.workload = Workload([query_0, query_1]) self.algo.workload = self.workload self.algo.cost_evaluation.calculate_cost = MagicMock( side_effect=self._calculate_cost_mock)
def test_calculate_indexes_1MB_2column(self): algorithm = RelaxationAlgorithm( database_connector=self.connector, parameters={"max_index_columns": 2, "budget": 1}, ) algorithm.cost_evaluation.cache = mock_cache algorithm.cost_evaluation._prepare_cost_calculation = ( self.set_estimated_index_sizes ) algorithm.cost_evaluation.estimate_size = self.set_estimated_index_size algorithm._exploit_virtual_indexes = lambda workload: ( None, { Index([column_A_0], 1000 * 1000), Index([column_A_0, column_A_1], 2000 * 1000), }, ) index_selection = algorithm.calculate_best_indexes( Workload([query_0, query_1], self.database_name) ) # The single column index is dropped first, because of the lower penalty. # The multi column index is prefixed second. self.assertEqual(set(index_selection), {Index([column_A_0])})
def test_appendable_by_index_with_already_present_column(self): index_with_already_present_column = Index([self.column_0]) index_0_1 = Index([self.column_0, self.column_1]) self.assertFalse( index_0_1.appendable_by(index_with_already_present_column))
def test_calculate_indexes_3000MB_2column(self): algorithm = RelaxationAlgorithm( database_connector=self.connector, parameters={"max_index_columns": 2, "budget": 3}, ) algorithm.cost_evaluation.cache = mock_cache algorithm.cost_evaluation._prepare_cost_calculation = ( self.set_estimated_index_sizes ) algorithm.cost_evaluation.estimate_size = self.set_estimated_index_size algorithm._exploit_virtual_indexes = lambda workload: ( None, { Index([column_A_0], 1000 * 1000), Index([column_A_0, column_A_1], 2000 * 1000), }, ) index_selection = algorithm.calculate_best_indexes( Workload([query_0, query_1], self.database_name) ) self.assertEqual( set(index_selection), set([Index([column_A_0]), Index([column_A_0, column_A_1])]), )
def test_potential_indexes(self): index_set_1 = set([Index([column_A_0])]) index_set_2 = set( [Index([column_A_0]), Index([column_A_1]), Index([column_A_2])]) self.assertEqual( set( Workload([query_0], database_name="test_DB").potential_indexes()), index_set_1, ) self.assertEqual( set( Workload([query_1], database_name="test_DB").potential_indexes()), index_set_2, ) self.assertEqual( set( Workload([query_0, query_1], database_name="test_DB").potential_indexes()), index_set_2, )
def setUp(self): self.connector = MockConnector() self.algo = EPICAlgorithm(database_connector=self.connector) self.column_1 = Column("ColA") self.column_2 = Column("ColB") self.column_3 = Column("ColC") self.all_columns = [self.column_1, self.column_2, self.column_3] self.table = Table("TableA") self.table.add_columns(self.all_columns) self.index_1 = Index([self.column_1]) self.index_1.estimated_size = 5 self.index_2 = Index([self.column_2]) self.index_2.estimated_size = 1 self.index_3 = Index([self.column_3]) self.index_3.estimated_size = 3 query_1 = Query(0, "SELECT * FROM TableA WHERE ColA = 4;", [self.column_1]) query_2 = Query( 1, "SELECT * FROM TableA WHERE ColA = 1 AND ColB = 2 AND ColC = 3;", self.all_columns, ) self.database_name = "test_DB" self.workload = Workload([query_1, query_2], self.database_name) self.algo.workload = self.workload
def test_calculate_indexes_3000MB_2column(self, get_utilized_indexes_mock): algorithm = RelaxationAlgorithm( database_connector=self.connector, parameters={ "max_index_width": 2, "budget_MB": 3 }, ) algorithm.cost_evaluation.cache = mock_cache algorithm.cost_evaluation._prepare_cost_calculation = ( self.set_estimated_index_sizes) algorithm.cost_evaluation.estimate_size = self.set_estimated_index_size get_utilized_indexes_mock.return_value = ( { Index([column_A_0], 1000 * 1000), Index([column_A_0, column_A_1], 2000 * 1000), }, None, ) index_selection = algorithm.calculate_best_indexes( Workload([query_0, query_1])) self.assertEqual( set(index_selection), set([Index([column_A_0]), Index([column_A_0, column_A_1])]), )
def test_index(self): columns = [self.column_0, self.column_1] index = Index(columns) self.assertEqual(index.columns, tuple(columns)) self.assertEqual(index.estimated_size, None) self.assertEqual(index.hypopg_name, None) with self.assertRaises(ValueError): Index([])
def test_appendable_by_other_table(self): column = Column("ColZ") table = Table("TableZ") table.add_column(column) index_on_other_table = Index([column]) index_0 = Index([self.column_0]) self.assertFalse(index_0.appendable_by(index_on_other_table))
def test_evaluate_workload(self): index_0 = Index([self.column_0]) index_1 = Index([self.column_1]) self.algo.cost_evaluation.calculate_cost = MagicMock() self.algo._evaluate_workload( [IndexBenefit(index_0, 10), IndexBenefit(index_1, 9)], workload=[]) self.algo.cost_evaluation.calculate_cost.assert_called_once_with( [], [index_0, index_1])
def setUpClass(cls): cls.column_a_0 = Column("Col0") cls.column_a_1 = Column("Col1") cls.table_a = Table("TableA") cls.table_a.add_columns([cls.column_a_0, cls.column_a_1]) cls.column_b_0 = Column("Col0") cls.table_b = Table("TableB") cls.table_b.add_columns([cls.column_b_0]) cls.index_0 = Index([cls.column_a_0]) cls.index_1 = Index([cls.column_b_0]) cls.index_2 = Index([cls.column_a_1])
def test_index_eq(self): index_0 = Index([self.column_0]) index_1 = Index([self.column_1]) index_2 = Index([self.column_0]) self.assertFalse(index_0 == index_1) self.assertTrue(index_0 == index_2) index_0_1 = Index([self.column_0, self.column_1]) self.assertTrue(index_0_1 == Index([self.column_0, self.column_1])) # Check comparing object of different class self.assertFalse(index_0_1 == int(3))
def test_exploit_virtual_indexes(self): def _simulate_index_mock(index, store_size): index.hypopg_name = f"<1337>btree_{index.columns}" # For some reason, the database decides to only use an index for one of # the filters def _simulate_get_plan(query): if "Table0" in query.text: return { "Total Cost": 17, "Plans": [{ "Index Name": "<1337>btree_(C table0.col1,)" }], } return { "Total Cost": 5, "Plans": [{ "Simple Table Retrieve": "table1" }] } query_0 = Query( 0, "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2;", [self.column_0, self.column_1], ) query_1 = Query(1, "SELECT * FROM Table1;", []) workload = Workload([query_0, query_1], "database_name") self.algo.database_connector.get_plan = MagicMock( side_effect=_simulate_get_plan) self.algo.what_if.simulate_index = MagicMock( side_effect=_simulate_index_mock) self.algo.what_if.drop_all_simulated_indexes = MagicMock() query_results, index_candidates = self.algo._exploit_virtual_indexes( workload) self.assertEqual(len(query_results), len(workload.queries)) expected_first_result = { "cost_without_indexes": 17, "cost_with_recommended_indexes": 17, "recommended_indexes": set([Index([self.column_1])]), } expected_second_result = { "cost_without_indexes": 5, "cost_with_recommended_indexes": 5, "recommended_indexes": set(), } self.assertEqual(query_results[query_0], expected_first_result) self.assertEqual(query_results[query_1], expected_second_result) self.assertEqual(index_candidates, set([Index([self.column_1])]))
def test_index_lt(self): index_0 = Index([self.column_0]) index_1 = Index([self.column_1]) self.assertTrue(index_0 < index_1) self.assertFalse(index_1 < index_0) index_0_1_2 = Index([self.column_0, self.column_1, self.column_2]) self.assertTrue(index_0 < index_0_1_2) self.assertFalse(index_0_1_2 < index_0) index_0_1 = Index([self.column_0, self.column_1]) index_0_2 = Index([self.column_0, self.column_2]) self.assertTrue(index_0_1 < index_0_2) self.assertFalse(index_0_2 < index_0_1)
def test_cache_hit_different_index_same_columns(self): self.assertEqual(self.cost_evaluation.cost_requests, 0) self.assertEqual(self.cost_evaluation.cache_hits, 0) workload = Workload([self.queries[0]]) self.cost_evaluation.calculate_cost(workload, set([Index([self.columns[0]])])) self.assertEqual(self.cost_evaluation.cost_requests, 1) self.assertEqual(self.cost_evaluation.cache_hits, 0) self.assertEqual(self.connector.get_cost.call_count, 1) self.cost_evaluation.calculate_cost(workload, set([Index([self.columns[0]])])) self.assertEqual(self.cost_evaluation.cost_requests, 2) self.assertEqual(self.cost_evaluation.cache_hits, 1) self.assertEqual(self.connector.get_cost.call_count, 1)
def setUpClass(cls): cls.db_name = "TestDB" cls.table = Table("TestTableA") cls.columns = [ Column("Col0"), Column("Col1"), Column("Col2"), Column("Col3"), Column("Col4"), ] cls.table.add_columns(cls.columns) cls.index_0 = Index([cls.columns[0]]) cls.index_1 = Index([cls.columns[1]]) cls.index_2 = Index([cls.columns[2]])
def test_calculate_best_indexes_scenario_2(self): self.algo.cost_evaluation.calculate_cost = MagicMock( side_effect=self._calculate_cost_mock_2 ) # There is only one index fitting the budget self.algo.budget = 1 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = [Index([self.column_1])] self.assertEqual(indexes, expected_indexes) # Theoretically, two indexes fit, but one has a better benefit/cost ratio self.algo.budget = 3 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = [Index([self.column_1])] self.assertEqual(indexes, expected_indexes) # The two indexes with the best ratio should be chosen self.algo.budget = 5 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = [Index([self.column_1]), Index([self.column_2])] self.assertEqual(indexes, expected_indexes) # All single column indexes are chosen self.algo.budget = 9 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = [ Index([self.column_1]), Index([self.column_2]), Index([self.column_3]), ] self.assertEqual(indexes, expected_indexes)
def test_relevant_indexes(self): index_0 = Index([self.columns[0]]) index_1 = Index([self.columns[1]]) result = self.cost_evaluation._relevant_indexes(self.queries[0], indexes=set()) self.assertEqual(result, frozenset()) result = self.cost_evaluation._relevant_indexes(self.queries[0], set([index_0])) self.assertEqual(result, frozenset([index_0])) result = self.cost_evaluation._relevant_indexes( self.queries[0], set([index_1, index_0]) ) self.assertEqual(result, frozenset([index_0])) result = self.cost_evaluation._relevant_indexes( self.queries[2], set([index_1, index_0]) ) self.assertEqual(result, frozenset([index_1, index_0]))
def test_calculate_best_indexes_scenario_3(self): query_1 = Query( 0, "SELECT * FROM TableA WHERE ColA = 1 AND ColB = 2;", [self.column_1, self.column_2], ) workload = Workload([query_1]) self.algo.cost_evaluation.calculate_cost = MagicMock( side_effect=self._calculate_cost_mock_3) # Budget too small for multi self.algo.budget = 2 indexes = self.algo._calculate_best_indexes(workload) expected_indexes = [Index([self.column_2])] self.assertEqual(indexes, expected_indexes) # Picks multi with best ratio self.algo.budget = 4 indexes = self.algo._calculate_best_indexes(workload) expected_indexes = [Index([self.column_2, self.column_1])] self.assertEqual(indexes, expected_indexes)
def test_calculate_indexes_2indexes_2columns(self): algorithm = AutoAdminAlgorithm( database_connector=self.connector, parameters={"max_indexes": 2, "max_index_width": 2}, ) algorithm.cost_evaluation.cache = mock_cache algorithm.cost_evaluation._prepare_cost_calculation = ( lambda indexes, store_size=False: None ) index_selection = algorithm.calculate_best_indexes(Workload([query_0, query_1])) self.assertEqual(set(index_selection), set([Index([column_A_0, column_A_1])]))
def test_calculate_index_benefits(self): index_0 = Index([self.column_0]) index_0.estimated_size = 5 index_1 = Index([self.column_1]) index_1.estimated_size = 1 index_2 = Index([self.column_2]) index_2.estimated_size = 3 query_result_0 = { "cost_without_indexes": 100, "cost_with_recommended_indexes": 50, "recommended_indexes": [index_0, index_1], } # Yes, negative benefit is possible query_result_1 = { "cost_without_indexes": 50, "cost_with_recommended_indexes": 60, "recommended_indexes": [index_1], } query_result_2 = { "cost_without_indexes": 60, "cost_with_recommended_indexes": 57, "recommended_indexes": [index_2], } query_result_3 = { "cost_without_indexes": 60, "cost_with_recommended_indexes": 60, "recommended_indexes": [], } query_results = { "q0": query_result_0, "q1": query_result_1, "q2": query_result_2, "q3": query_result_3, } index_benefits = self.algo._calculate_index_benefits( [index_0, index_1, index_2], query_results) expected_index_benefits = [ IndexBenefit(index_1, 40), IndexBenefit(index_0, 50), IndexBenefit(index_2, 3), ] self.assertEqual(index_benefits, expected_index_benefits)
def test_calculate_best_indexes_scenario_1(self): self.algo.cost_evaluation.calculate_cost = MagicMock( side_effect=self._calculate_cost_mock_1 ) # Each one alone of the single column indexes would fit, # but the one with the best benefit/cost ratio is chosen self.algo.budget = 1 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = [Index([self.column_3])] self.assertEqual(indexes, expected_indexes) # Two single column indexes would fit, but the two best ones are chosen self.algo.budget = 2 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = [Index([self.column_3]), Index([self.column_2])] self.assertEqual(indexes, expected_indexes) # All single column indexes are chosen self.algo.budget = 3 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = [ Index([self.column_3]), Index([self.column_2]), Index([self.column_1]), ] self.assertEqual(indexes, expected_indexes)
def test_prefixes(self): index = Index([self.column_0, self.column_1, self.column_2]) result = index.prefixes() expected = [ Index([self.column_0, self.column_1]), Index([self.column_0]) ] self.assertEqual(result, expected) # A single-column index has no prefixes. index = Index([self.column_0]) result = index.prefixes() expected = [] self.assertEqual(result, expected)
def test_no_cache_hit_unseen(self): self.assertEqual(self.cost_evaluation.cost_requests, 0) self.assertEqual(self.cost_evaluation.cache_hits, 0) workload = Workload([self.queries[0]], self.db_name) index_0 = Index([self.columns[0]]) self.cost_evaluation.calculate_cost(workload, indexes=set()) self.assertEqual(self.cost_evaluation.cost_requests, 1) self.assertEqual(self.cost_evaluation.cache_hits, 0) self.assertEqual(self.connector.get_cost.call_count, 1) self.cost_evaluation.calculate_cost(workload, set([index_0])) self.assertEqual(self.cost_evaluation.cost_requests, 2) self.assertEqual(self.cost_evaluation.cache_hits, 0) self.assertEqual(self.connector.get_cost.call_count, 2) self.connector.simulate_index.assert_called_with(index_0)
def test_cache_hit_non_relevant_index(self): self.assertEqual(self.cost_evaluation.cost_requests, 0) self.assertEqual(self.cost_evaluation.cache_hits, 0) workload = Workload([self.queries[0]]) index_1 = Index([self.columns[1]]) self.cost_evaluation.calculate_cost(workload, indexes=set()) self.assertEqual(self.cost_evaluation.cost_requests, 1) self.assertEqual(self.cost_evaluation.cache_hits, 0) self.assertEqual(self.connector.get_cost.call_count, 1) self.cost_evaluation.calculate_cost(workload, set([index_1])) self.assertEqual(self.cost_evaluation.cost_requests, 2) self.assertEqual(self.cost_evaluation.cache_hits, 1) self.assertEqual(self.connector.get_cost.call_count, 1) self.connector.simulate_index.assert_called_with(index_1)
def test_runtime_data_logging(self): db = PostgresDatabaseConnector(self.db_name, "postgres") query = Query(17, "SELECT count(*) FROM nation;") db.get_cost(query) self.assertEqual(db.cost_estimations, 1) self.assertGreater(db.cost_estimation_duration, 0) column_n_name = Column("n_name") nation_table = Table("nation") nation_table.add_column(column_n_name) index = Index([column_n_name]) index_oid = db.simulate_index(index)[0] self.assertGreater(db.index_simulation_duration, 0) self.assertEqual(db.simulated_indexes, 1) previou_simulation_duration = db.index_simulation_duration db.drop_simulated_index(index_oid) self.assertGreater(db.index_simulation_duration, previou_simulation_duration)
def test_index_benefit__lt__(self): index_0 = Index([self.column_0]) index_0.estimated_size = 1 index_1 = Index([self.column_1]) index_1.estimated_size = 2 # Due to its size, index_0 has the better ratio index_benefit_0 = IndexBenefit(index_0, 10) index_benefit_1 = IndexBenefit(index_1, 10) self.assertTrue(index_benefit_1 < index_benefit_0) # The ratios are equal, the columns are taken into consideration index_benefit_1 = IndexBenefit(index_1, 20) self.assertTrue(index_benefit_0 < index_benefit_1)
def test_which_indexes_utilized_and_cost(self): def _simulate_index_mock(index, store_size): index.hypopg_name = f"<1337>btree_{index.columns}" # For some reason, the database decides to only use an index for one of # the filters def _simulate_get_plan(query): plan = { "Total Cost": 17, "Plans": [ { "Index Name": "<1337>btree_(C testtablea.col1,)", "Filter": "(Col0 = 14)", } ], } return plan query = self.queries[2] self.cost_evaluation.db_connector.get_plan = MagicMock( side_effect=_simulate_get_plan ) self.cost_evaluation.what_if.simulate_index = MagicMock( side_effect=_simulate_index_mock ) candidates = syntactically_relevant_indexes(query, max_index_width=2) indexes, cost = self.cost_evaluation.which_indexes_utilized_and_cost( query, candidates ) self.assertEqual(cost, 17) self.assertEqual(indexes, {Index([self.columns[1]])}) self.assertEqual( self.cost_evaluation.what_if.simulate_index.call_count, len(candidates) ) self.cost_evaluation.db_connector.get_plan.assert_called_once_with(query) self.assertCountEqual(self.cost_evaluation.current_indexes, candidates)