def test_workload_indexable_columns(self): table = Table("TableA") column_1 = Column(name="ColA") column_2 = Column(name="ColB") column_3 = Column(name="ColC") table.add_column(column_1) table.add_column(column_2) table.add_column(column_3) query_1 = Query( 17, "SELECT * FROM TableA WHERE ColA = 4 AND ColB = 5;", columns=[column_1, column_2], ) query_2 = Query( 18, "SELECT * FROM TableA WHERE ColA = 3 AND ColC = 2;", columns=[column_1, column_3], ) database_name = "test_DB" workload = Workload([query_1, query_2], database_name) indexable_columns = workload.indexable_columns() self.assertEqual(sorted(indexable_columns), sorted([column_1, column_2, column_3]))
def setUpClass(cls): cls.db_name = "TestDB" cls.table = Table("TestTableA") cls.columns = [ Column("Col0"), Column("Col1"), Column("Col2"), Column("Col3"), Column("Col4"), ] cls.table.add_columns(cls.columns) cls.queries = [ Query(0, "SELECT * FROM TestTableA WHERE Col0 = 4", [cls.columns[0]]), Query(1, "SELECT * FROM TestTableA WHERE Col1 = 3", [cls.columns[1]]), Query( 2, "SELECT * FROM TestTableA WHERE Col0 = 14 AND Col1 = 13", [cls.columns[0], cls.columns[1]], ), ] cls.workload = Workload(cls.queries, cls.db_name)
def setUp(self): self.connector = MockConnector() self.algo = DropHeuristicAlgorithm(database_connector=self.connector) self.column_0 = Column("Col0") self.column_1 = Column("Col1") self.column_2 = Column("Col2") self.all_columns = [self.column_0, self.column_1, self.column_2] self.table = Table("TableA") self.table.add_columns(self.all_columns) self.index_0 = Index([self.column_0]) self.index_1 = Index([self.column_1]) self.index_2 = Index([self.column_2]) query_0 = Query(0, "SELECT * FROM TableA WHERE Col0 = 4;", [self.column_0]) query_1 = Query( 1, "SELECT * FROM TableA WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;", self.all_columns, ) self.database_name = "test_DB" self.workload = Workload([query_0, query_1]) self.algo.workload = self.workload self.algo.cost_evaluation.calculate_cost = MagicMock( side_effect=self._calculate_cost_mock)
def setUp(self): self.connector = MockConnector() self.algo = EPICAlgorithm(database_connector=self.connector) self.column_1 = Column("ColA") self.column_2 = Column("ColB") self.column_3 = Column("ColC") self.all_columns = [self.column_1, self.column_2, self.column_3] self.table = Table("TableA") self.table.add_columns(self.all_columns) self.index_1 = Index([self.column_1]) self.index_1.estimated_size = 5 self.index_2 = Index([self.column_2]) self.index_2.estimated_size = 1 self.index_3 = Index([self.column_3]) self.index_3.estimated_size = 3 query_1 = Query(0, "SELECT * FROM TableA WHERE ColA = 4;", [self.column_1]) query_2 = Query( 1, "SELECT * FROM TableA WHERE ColA = 1 AND ColB = 2 AND ColC = 3;", self.all_columns, ) self.database_name = "test_DB" self.workload = Workload([query_1, query_2], self.database_name) self.algo.workload = self.workload
def test_get_utilized_indexes(self): class CostEvaluationMock: def which_indexes_utilized_and_cost(_, query, indexes): if query.nr == 0: return [{self.index_0}, 17] if query.nr == 1: return [{self.index_0, self.index_2}, 14] def calculate_cost(_, workload, indexes): assert len(workload.queries) == 1, ( "get_utilized_indexes' calculate_cost_mock should not be " "called with workloads that contain more than one query" ) assert indexes == [], ( "get_utilized_indexes' calculate_cost_mock should not be " "called with indexes" ) query = workload.queries[0] if query.nr == 0: return 170 if query.nr == 1: return 140 query_0 = Query(0, "SELECT * FROM tablea WHERE col0 = 4;", [self.column_a_0]) query_1 = Query( 1, ( "SELECT * FROM tablea as a, tableb as b WHERE a.col0 = 4 AND " "a.col1 = 17AND b.col0 = 3;" ), [self.column_a_0, self.column_a_1, self.column_b_0], ) workload = Workload([query_0, query_1]) candidates = candidates_per_query(workload, 2, syntactically_relevant_indexes) utilized_indexes, query_details = get_utilized_indexes( workload, candidates, CostEvaluationMock() ) self.assertEqual(query_details, {}) self.assertEqual(utilized_indexes, {self.index_0, self.index_2}) expected_first_result = { "cost_without_indexes": 170, "cost_with_indexes": 17, "utilized_indexes": {self.index_0}, } expected_second_result = { "cost_without_indexes": 140, "cost_with_indexes": 14, "utilized_indexes": {self.index_0, self.index_2}, } utilized_indexes, query_details = get_utilized_indexes( workload, candidates, CostEvaluationMock(), detailed_query_information=True ) self.assertEqual(query_details[query_0], expected_first_result) self.assertEqual(query_details[query_1], expected_second_result) self.assertEqual(utilized_indexes, {self.index_0, self.index_2})
def test_workload(self): query_1 = Query(17, "SELECT * FROM TableA;") query_2 = Query(18, "SELECT * FROM nation;") database_name = "test_DB" workload = Workload([query_1, query_2], database_name) self.assertEqual(workload.queries, [query_1, query_2]) self.assertEqual(workload.database_name, database_name)
def test_exploit_virtual_indexes(self): def _simulate_index_mock(index, store_size): index.hypopg_name = f"<1337>btree_{index.columns}" # For some reason, the database decides to only use an index for one of # the filters def _simulate_get_plan(query): if "Table0" in query.text: return { "Total Cost": 17, "Plans": [{ "Index Name": "<1337>btree_(C table0.col1,)" }], } return { "Total Cost": 5, "Plans": [{ "Simple Table Retrieve": "table1" }] } query_0 = Query( 0, "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2;", [self.column_0, self.column_1], ) query_1 = Query(1, "SELECT * FROM Table1;", []) workload = Workload([query_0, query_1], "database_name") self.algo.database_connector.get_plan = MagicMock( side_effect=_simulate_get_plan) self.algo.what_if.simulate_index = MagicMock( side_effect=_simulate_index_mock) self.algo.what_if.drop_all_simulated_indexes = MagicMock() query_results, index_candidates = self.algo._exploit_virtual_indexes( workload) self.assertEqual(len(query_results), len(workload.queries)) expected_first_result = { "cost_without_indexes": 17, "cost_with_recommended_indexes": 17, "recommended_indexes": set([Index([self.column_1])]), } expected_second_result = { "cost_without_indexes": 5, "cost_with_recommended_indexes": 5, "recommended_indexes": set(), } self.assertEqual(query_results[query_0], expected_first_result) self.assertEqual(query_results[query_1], expected_second_result) self.assertEqual(index_candidates, set([Index([self.column_1])]))
def test_query(self): query = Query(17, "SELECT * FROM lineitem;") self.assertEqual(query.nr, 17) self.assertEqual(query.text, "SELECT * FROM lineitem;") self.assertEqual(query.columns, []) column_1 = Column(name="ColA") column_2 = Column(name="ColB") query_2 = Query(18, "SELECT * FROM nation;", columns=[column_1, column_2]) self.assertEqual(query_2.nr, 18) self.assertEqual(query_2.text, "SELECT * FROM nation;") self.assertEqual(query_2.columns, [column_1, column_2])
def test_possible_indexes(self): column_0_table_1 = Column("Col0") table_1 = Table("Table1") table_1.add_column(column_0_table_1) query = Query( 17, """SELECT * FROM Table0 as t0, Table1 as t1 WHERE t0.Col0 = 1" AND t0.Col1 = 2 AND t0.Col2 = 3 AND t1.Col0 = 17;""", [self.column_0, self.column_1, self.column_2, column_0_table_1], ) indexes = self.algo._possible_indexes(query) self.assertIn(Index([column_0_table_1]), indexes) self.assertIn(Index([self.column_0]), indexes) self.assertIn(Index([self.column_1]), indexes) self.assertIn(Index([self.column_2]), indexes) self.assertIn(Index([self.column_0, self.column_1]), indexes) self.assertIn(Index([self.column_0, self.column_2]), indexes) self.assertIn(Index([self.column_1, self.column_0]), indexes) self.assertIn(Index([self.column_1, self.column_2]), indexes) self.assertIn(Index([self.column_2, self.column_0]), indexes) self.assertIn(Index([self.column_2, self.column_1]), indexes) self.assertIn(Index([self.column_0, self.column_1, self.column_2]), indexes) self.assertIn(Index([self.column_0, self.column_2, self.column_1]), indexes) self.assertIn(Index([self.column_1, self.column_0, self.column_2]), indexes) self.assertIn(Index([self.column_1, self.column_2, self.column_0]), indexes) self.assertIn(Index([self.column_2, self.column_0, self.column_1]), indexes) self.assertIn(Index([self.column_2, self.column_1, self.column_0]), indexes)
def setUp(self): self.connector = MockConnector() self.algo = IBMAlgorithm(database_connector=self.connector) self.column_0 = Column("Col0") self.column_1 = Column("Col1") self.column_2 = Column("Col2") self.column_3 = Column("Col3") self.column_4 = Column("Col4") self.column_5 = Column("Col5") self.column_6 = Column("Col6") self.column_7 = Column("Col7") self.all_columns = [ self.column_0, self.column_1, self.column_2, self.column_3, self.column_4, self.column_5, self.column_6, self.column_7, ] self.table = Table("Table0") self.table.add_columns(self.all_columns) self.query_0 = Query( 0, "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;", self.all_columns, )
def setUpClass(cls): cls.column_0 = Column("Col0") cls.column_1 = Column("Col1") cls.column_2 = Column("Col2") cls.table = Table("Table0") cls.table.add_columns([cls.column_0, cls.column_1, cls.column_2]) cls.column_0_table_1 = Column("Col0") cls.table_1 = Table("Table1") cls.table_1.add_column(cls.column_0_table_1) cls.query_0 = Query( 17, """SELECT * FROM Table0 as t0, Table1 as t1 WHERE t0.Col0 = 1" AND t0.Col1 = 2 AND t0.Col2 = 3 AND t1.Col0 = 17;""", [cls.column_0, cls.column_1, cls.column_2, cls.column_0_table_1], )
def test_runtime_data_logging(self): db = PostgresDatabaseConnector(self.db_name, "postgres") query = Query(17, "SELECT count(*) FROM nation;") db.get_cost(query) self.assertEqual(db.cost_estimations, 1) self.assertGreater(db.cost_estimation_duration, 0) column_n_name = Column("n_name") nation_table = Table("nation") nation_table.add_column(column_n_name) index = Index([column_n_name]) index_oid = db.simulate_index(index)[0] self.assertGreater(db.index_simulation_duration, 0) self.assertEqual(db.simulated_indexes, 1) previou_simulation_duration = db.index_simulation_duration db.drop_simulated_index(index_oid) self.assertGreater(db.index_simulation_duration, previou_simulation_duration)
def test_candidates_per_query(self): MAX_INDEX_WIDTH = 2 query_1 = Query(18, """SELECT * FROM 1;""") workload = Workload([self.query_0, query_1]) syntactically_relevant_indexes_mock = MagicMock( return_value=syntactically_relevant_indexes) result = candidates_per_query( workload, max_index_width=MAX_INDEX_WIDTH, candidate_generator=syntactically_relevant_indexes_mock, ) self.assertEqual(len(result), len(workload.queries)) syntactically_relevant_indexes_mock.assert_called_with( query_1, MAX_INDEX_WIDTH) syntactically_relevant_indexes_mock.assert_any_call( self.query_0, MAX_INDEX_WIDTH)
def test_calculate_best_indexes_scenario_3(self): query_1 = Query( 0, "SELECT * FROM TableA WHERE ColA = 1 AND ColB = 2;", [self.column_1, self.column_2], ) workload = Workload([query_1]) self.algo.cost_evaluation.calculate_cost = MagicMock( side_effect=self._calculate_cost_mock_3) # Budget too small for multi self.algo.budget = 2 indexes = self.algo._calculate_best_indexes(workload) expected_indexes = [Index([self.column_2])] self.assertEqual(indexes, expected_indexes) # Picks multi with best ratio self.algo.budget = 4 indexes = self.algo._calculate_best_indexes(workload) expected_indexes = [Index([self.column_2, self.column_1])] self.assertEqual(indexes, expected_indexes)
def test_recommended_indexes(self): def _simulate_index_mock(index, store_size): index.hypopg_name = f"<1337>btree_{index.columns}" # For some reason, the database decides to only use an index for one of # the filters def _simulate_get_plan(query): plan = { "Total Cost": 17, "Plans": [{ "Index Name": "<1337>btree_(C table0.col1,)", "Filter": "(Col0 = 1)", }], } return plan query = Query( 17, "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2;", [self.column_0, self.column_1], ) self.algo.database_connector.get_plan = MagicMock( side_effect=_simulate_get_plan) self.algo.what_if.simulate_index = MagicMock( side_effect=_simulate_index_mock) self.algo.what_if.drop_all_simulated_indexes = MagicMock() indexes, cost = self.algo._recommended_indexes(query) self.assertEqual(cost, 17) self.assertEqual(indexes, {Index([self.column_1])}) self.assertEqual(self.algo.what_if.simulate_index.call_count, 4) self.algo.what_if.drop_all_simulated_indexes.assert_called_once() self.algo.database_connector.get_plan.assert_called_once_with(query)
from selection.cost_evaluation import CostEvaluation from selection.index import Index from selection.workload import Column, Query, Table import itertools table_A = Table("TableA") column_A_0 = Column("Col0") column_A_1 = Column("Col1") column_A_2 = Column("Col2") table_A.add_columns([column_A_0, column_A_1, column_A_2]) query_0 = Query(0, "SELECT * FROM TableA WHERE Col0 = 4;", [column_A_0]) query_1 = Query( 1, "SELECT * FROM TableA WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;", [column_A_0, column_A_1, column_A_2], ) mock_cache = {} table_A_potential_indexes = [] # Calculate potential indexes for TableA for number_of_columns in range(1, len(table_A.columns) + 1): for column_list in itertools.permutations(table_A.columns, number_of_columns): table_A_potential_indexes.append(Index(column_list)) # Calculate relevant indexes for query_0 based on potential indexes relevant_indexes_query1_table_A = CostEvaluation._relevant_indexes( query_0, table_A_potential_indexes )
def test_workload(self): query_1 = Query(17, "SELECT * FROM TableA;") query_2 = Query(18, "SELECT * FROM nation;") workload = Workload([query_1, query_2]) self.assertEqual(workload.queries, [query_1, query_2])
def test_query_repr(self): query = Query(17, "SELECT * FROM lineitem;") self.assertEqual(repr(query), "Q17")