def test_potential_indexes(self):
        index_set_1 = set([Index([column_A_0])])
        index_set_2 = set(
            [Index([column_A_0]),
             Index([column_A_1]),
             Index([column_A_2])])

        self.assertEqual(
            set(
                Workload([query_0],
                         database_name="test_DB").potential_indexes()),
            index_set_1,
        )
        self.assertEqual(
            set(
                Workload([query_1],
                         database_name="test_DB").potential_indexes()),
            index_set_2,
        )
        self.assertEqual(
            set(
                Workload([query_0, query_1],
                         database_name="test_DB").potential_indexes()),
            index_set_2,
        )
Пример #2
0
    def setUpClass(cls):
        cls.db_name = "tpch_test_db_index_selection"
        cls.index_selection = IndexSelection()
        db = PostgresDatabaseConnector(None, autocommit=True)

        SCALE_FACTOR = 0.001
        table_generator = TableGenerator("tpch",
                                         SCALE_FACTOR,
                                         db,
                                         explicit_database_name=cls.db_name)
        db.close()

        cls.index_selection.setup_db_connector(cls.db_name, "postgres")

        # Filter worklaod
        query_generator = QueryGenerator(
            "tpch",
            SCALE_FACTOR,
            cls.index_selection.db_connector,
            [3, 14],
            table_generator.columns,
        )
        cls.small_tpch = Workload(query_generator.queries)

        query_generator = QueryGenerator(
            "tpch",
            SCALE_FACTOR,
            cls.index_selection.db_connector,
            [5, 6],
            table_generator.columns,
        )
        cls.tpch_5_and_6 = Workload(query_generator.queries)
    def test_workload_indexable_columns(self):
        table = Table("TableA")
        column_1 = Column(name="ColA")
        column_2 = Column(name="ColB")
        column_3 = Column(name="ColC")
        table.add_column(column_1)
        table.add_column(column_2)
        table.add_column(column_3)

        query_1 = Query(
            17,
            "SELECT * FROM TableA WHERE ColA = 4 AND ColB = 5;",
            columns=[column_1, column_2],
        )
        query_2 = Query(
            18,
            "SELECT * FROM TableA WHERE ColA = 3 AND ColC = 2;",
            columns=[column_1, column_3],
        )
        database_name = "test_DB"

        workload = Workload([query_1, query_2], database_name)
        indexable_columns = workload.indexable_columns()
        self.assertEqual(sorted(indexable_columns),
                         sorted([column_1, column_2, column_3]))
Пример #4
0
    def setUp(self):
        self.connector = MockConnector()
        self.algo = DropHeuristicAlgorithm(database_connector=self.connector)

        self.column_0 = Column("Col0")
        self.column_1 = Column("Col1")
        self.column_2 = Column("Col2")
        self.all_columns = [self.column_0, self.column_1, self.column_2]

        self.table = Table("TableA")
        self.table.add_columns(self.all_columns)

        self.index_0 = Index([self.column_0])
        self.index_1 = Index([self.column_1])
        self.index_2 = Index([self.column_2])

        query_0 = Query(0, "SELECT * FROM TableA WHERE Col0 = 4;",
                        [self.column_0])
        query_1 = Query(
            1,
            "SELECT * FROM TableA WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;",
            self.all_columns,
        )
        self.database_name = "test_DB"

        self.workload = Workload([query_0, query_1])
        self.algo.workload = self.workload
        self.algo.cost_evaluation.calculate_cost = MagicMock(
            side_effect=self._calculate_cost_mock)
Пример #5
0
    def test_calculate_indexes_1MB_2column(self, get_utilized_indexes_mock):
        algorithm = RelaxationAlgorithm(
            database_connector=self.connector,
            parameters={
                "max_index_width": 2,
                "budget_MB": 1
            },
        )
        algorithm.cost_evaluation.cache = mock_cache

        algorithm.cost_evaluation._prepare_cost_calculation = (
            self.set_estimated_index_sizes)
        algorithm.cost_evaluation.estimate_size = self.set_estimated_index_size
        get_utilized_indexes_mock.return_value = (
            {
                Index([column_A_0], 1000 * 1000),
                Index([column_A_0, column_A_1], 2000 * 1000),
            },
            None,
        )

        index_selection = algorithm.calculate_best_indexes(
            Workload([query_0, query_1]))
        # The single column index is dropped first, because of the lower penalty.
        # The multi column index is prefixed second.
        self.assertEqual(set(index_selection), {Index([column_A_0])})
Пример #6
0
    def setUpClass(cls):
        cls.db_name = "TestDB"

        cls.table = Table("TestTableA")
        cls.columns = [
            Column("Col0"),
            Column("Col1"),
            Column("Col2"),
            Column("Col3"),
            Column("Col4"),
        ]
        cls.table.add_columns(cls.columns)

        cls.queries = [
            Query(0, "SELECT * FROM TestTableA WHERE Col0 = 4",
                  [cls.columns[0]]),
            Query(1, "SELECT * FROM TestTableA WHERE Col1 = 3",
                  [cls.columns[1]]),
            Query(
                2,
                "SELECT * FROM TestTableA WHERE Col0 = 14 AND Col1 = 13",
                [cls.columns[0], cls.columns[1]],
            ),
        ]

        cls.workload = Workload(cls.queries, cls.db_name)
Пример #7
0
    def test_calculate_indexes_1MB_2column(self):
        algorithm = RelaxationAlgorithm(
            database_connector=self.connector,
            parameters={"max_index_columns": 2, "budget": 1},
        )
        algorithm.cost_evaluation.cache = mock_cache

        algorithm.cost_evaluation._prepare_cost_calculation = (
            self.set_estimated_index_sizes
        )
        algorithm.cost_evaluation.estimate_size = self.set_estimated_index_size
        algorithm._exploit_virtual_indexes = lambda workload: (
            None,
            {
                Index([column_A_0], 1000 * 1000),
                Index([column_A_0, column_A_1], 2000 * 1000),
            },
        )

        index_selection = algorithm.calculate_best_indexes(
            Workload([query_0, query_1], self.database_name)
        )
        # The single column index is dropped first, because of the lower penalty.
        # The multi column index is prefixed second.
        self.assertEqual(set(index_selection), {Index([column_A_0])})
Пример #8
0
    def test_calculate_indexes_3000MB_2column(self):
        algorithm = RelaxationAlgorithm(
            database_connector=self.connector,
            parameters={"max_index_columns": 2, "budget": 3},
        )
        algorithm.cost_evaluation.cache = mock_cache
        algorithm.cost_evaluation._prepare_cost_calculation = (
            self.set_estimated_index_sizes
        )
        algorithm.cost_evaluation.estimate_size = self.set_estimated_index_size
        algorithm._exploit_virtual_indexes = lambda workload: (
            None,
            {
                Index([column_A_0], 1000 * 1000),
                Index([column_A_0, column_A_1], 2000 * 1000),
            },
        )

        index_selection = algorithm.calculate_best_indexes(
            Workload([query_0, query_1], self.database_name)
        )
        self.assertEqual(
            set(index_selection),
            set([Index([column_A_0]), Index([column_A_0, column_A_1])]),
        )
    def setUp(self):
        self.connector = MockConnector()
        self.algo = EPICAlgorithm(database_connector=self.connector)

        self.column_1 = Column("ColA")
        self.column_2 = Column("ColB")
        self.column_3 = Column("ColC")
        self.all_columns = [self.column_1, self.column_2, self.column_3]

        self.table = Table("TableA")
        self.table.add_columns(self.all_columns)

        self.index_1 = Index([self.column_1])
        self.index_1.estimated_size = 5
        self.index_2 = Index([self.column_2])
        self.index_2.estimated_size = 1
        self.index_3 = Index([self.column_3])
        self.index_3.estimated_size = 3

        query_1 = Query(0, "SELECT * FROM TableA WHERE ColA = 4;", [self.column_1])
        query_2 = Query(
            1,
            "SELECT * FROM TableA WHERE ColA = 1 AND ColB = 2 AND ColC = 3;",
            self.all_columns,
        )
        self.database_name = "test_DB"

        self.workload = Workload([query_1, query_2], self.database_name)
        self.algo.workload = self.workload
Пример #10
0
    def test_calculate_indexes_3000MB_2column(self, get_utilized_indexes_mock):
        algorithm = RelaxationAlgorithm(
            database_connector=self.connector,
            parameters={
                "max_index_width": 2,
                "budget_MB": 3
            },
        )
        algorithm.cost_evaluation.cache = mock_cache
        algorithm.cost_evaluation._prepare_cost_calculation = (
            self.set_estimated_index_sizes)
        algorithm.cost_evaluation.estimate_size = self.set_estimated_index_size
        get_utilized_indexes_mock.return_value = (
            {
                Index([column_A_0], 1000 * 1000),
                Index([column_A_0, column_A_1], 2000 * 1000),
            },
            None,
        )

        index_selection = algorithm.calculate_best_indexes(
            Workload([query_0, query_1]))
        self.assertEqual(
            set(index_selection),
            set([Index([column_A_0]),
                 Index([column_A_0, column_A_1])]),
        )
Пример #11
0
    def test_get_utilized_indexes(self):
        class CostEvaluationMock:
            def which_indexes_utilized_and_cost(_, query, indexes):
                if query.nr == 0:
                    return [{self.index_0}, 17]
                if query.nr == 1:
                    return [{self.index_0, self.index_2}, 14]

            def calculate_cost(_, workload, indexes):
                assert len(workload.queries) == 1, (
                    "get_utilized_indexes' calculate_cost_mock should not be "
                    "called with workloads that contain more than one query"
                )
                assert indexes == [], (
                    "get_utilized_indexes' calculate_cost_mock should not be "
                    "called with indexes"
                )

                query = workload.queries[0]

                if query.nr == 0:
                    return 170
                if query.nr == 1:
                    return 140

        query_0 = Query(0, "SELECT * FROM tablea WHERE col0 = 4;", [self.column_a_0])
        query_1 = Query(
            1,
            (
                "SELECT * FROM tablea as a, tableb as b WHERE a.col0 = 4 AND "
                "a.col1 = 17AND b.col0 = 3;"
            ),
            [self.column_a_0, self.column_a_1, self.column_b_0],
        )
        workload = Workload([query_0, query_1])
        candidates = candidates_per_query(workload, 2, syntactically_relevant_indexes)

        utilized_indexes, query_details = get_utilized_indexes(
            workload, candidates, CostEvaluationMock()
        )
        self.assertEqual(query_details, {})
        self.assertEqual(utilized_indexes, {self.index_0, self.index_2})

        expected_first_result = {
            "cost_without_indexes": 170,
            "cost_with_indexes": 17,
            "utilized_indexes": {self.index_0},
        }
        expected_second_result = {
            "cost_without_indexes": 140,
            "cost_with_indexes": 14,
            "utilized_indexes": {self.index_0, self.index_2},
        }
        utilized_indexes, query_details = get_utilized_indexes(
            workload, candidates, CostEvaluationMock(), detailed_query_information=True
        )
        self.assertEqual(query_details[query_0], expected_first_result)
        self.assertEqual(query_details[query_1], expected_second_result)
        self.assertEqual(utilized_indexes, {self.index_0, self.index_2})
    def test_workload(self):
        query_1 = Query(17, "SELECT * FROM TableA;")
        query_2 = Query(18, "SELECT * FROM nation;")
        database_name = "test_DB"

        workload = Workload([query_1, query_2], database_name)
        self.assertEqual(workload.queries, [query_1, query_2])
        self.assertEqual(workload.database_name, database_name)
Пример #13
0
    def test_calculate_best_only_executable_once(self):
        workload = Workload([])
        selection_algorithm = NoIndexAlgorithm(
            PostgresDatabaseConnector(None, autocommit=True))
        self.assertFalse(selection_algorithm.did_run)

        selection_algorithm.calculate_best_indexes(workload)
        self.assertTrue(selection_algorithm.did_run)

        with self.assertRaises(AssertionError):
            selection_algorithm.calculate_best_indexes(workload)
    def test_calculate_indexes_2indexes_2columns(self):
        algorithm = AutoAdminAlgorithm(
            database_connector=self.connector,
            parameters={"max_indexes": 2, "max_index_width": 2},
        )
        algorithm.cost_evaluation.cache = mock_cache
        algorithm.cost_evaluation._prepare_cost_calculation = (
            lambda indexes, store_size=False: None
        )

        index_selection = algorithm.calculate_best_indexes(Workload([query_0, query_1]))
        self.assertEqual(set(index_selection), set([Index([column_A_0, column_A_1])]))
Пример #15
0
    def test_db2advis_algorithm(self):
        parameters = {}
        db2advis_algorithm = self.index_selection.create_algorithm_object(
            "db2advis", parameters)
        workload = Workload([self.small_tpch.queries[0]])

        possible = candidates_per_query(
            workload,
            max_index_width=3,
            candidate_generator=syntactically_relevant_indexes,
        )[0]
        indexes = db2advis_algorithm.calculate_best_indexes(workload)
        self.assertTrue(len(possible) >= len(indexes))
Пример #16
0
    def test_exploit_virtual_indexes(self):
        def _simulate_index_mock(index, store_size):
            index.hypopg_name = f"<1337>btree_{index.columns}"

        # For some reason, the database decides to only use an index for one of
        # the filters
        def _simulate_get_plan(query):
            if "Table0" in query.text:
                return {
                    "Total Cost": 17,
                    "Plans": [{
                        "Index Name": "<1337>btree_(C table0.col1,)"
                    }],
                }

            return {
                "Total Cost": 5,
                "Plans": [{
                    "Simple Table Retrieve": "table1"
                }]
            }

        query_0 = Query(
            0,
            "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2;",
            [self.column_0, self.column_1],
        )
        query_1 = Query(1, "SELECT * FROM Table1;", [])
        workload = Workload([query_0, query_1], "database_name")

        self.algo.database_connector.get_plan = MagicMock(
            side_effect=_simulate_get_plan)
        self.algo.what_if.simulate_index = MagicMock(
            side_effect=_simulate_index_mock)
        self.algo.what_if.drop_all_simulated_indexes = MagicMock()
        query_results, index_candidates = self.algo._exploit_virtual_indexes(
            workload)
        self.assertEqual(len(query_results), len(workload.queries))
        expected_first_result = {
            "cost_without_indexes": 17,
            "cost_with_recommended_indexes": 17,
            "recommended_indexes": set([Index([self.column_1])]),
        }
        expected_second_result = {
            "cost_without_indexes": 5,
            "cost_with_recommended_indexes": 5,
            "recommended_indexes": set(),
        }
        self.assertEqual(query_results[query_0], expected_first_result)
        self.assertEqual(query_results[query_1], expected_second_result)
        self.assertEqual(index_candidates, set([Index([self.column_1])]))
    def test_cache_hit(self):
        self.assertEqual(self.cost_evaluation.cost_requests, 0)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)

        workload = Workload([self.queries[0]])

        self.cost_evaluation.calculate_cost(workload, indexes=set())
        self.assertEqual(self.cost_evaluation.cost_requests, 1)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)
        self.assertEqual(self.connector.get_cost.call_count, 1)

        self.cost_evaluation.calculate_cost(workload, indexes=set())
        self.assertEqual(self.cost_evaluation.cost_requests, 2)
        self.assertEqual(self.cost_evaluation.cache_hits, 1)
        self.assertEqual(self.connector.get_cost.call_count, 1)
    def test_cache_hit_different_index_same_columns(self):
        self.assertEqual(self.cost_evaluation.cost_requests, 0)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)

        workload = Workload([self.queries[0]])

        self.cost_evaluation.calculate_cost(workload, set([Index([self.columns[0]])]))
        self.assertEqual(self.cost_evaluation.cost_requests, 1)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)
        self.assertEqual(self.connector.get_cost.call_count, 1)

        self.cost_evaluation.calculate_cost(workload, set([Index([self.columns[0]])]))
        self.assertEqual(self.cost_evaluation.cost_requests, 2)
        self.assertEqual(self.cost_evaluation.cache_hits, 1)
        self.assertEqual(self.connector.get_cost.call_count, 1)
Пример #19
0
    def test_no_cache_hit_unseen(self):
        self.assertEqual(self.cost_evaluation.cost_requests, 0)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)

        workload = Workload([self.queries[0]], self.db_name)
        index_0 = Index([self.columns[0]])

        self.cost_evaluation.calculate_cost(workload, indexes=set())
        self.assertEqual(self.cost_evaluation.cost_requests, 1)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)
        self.assertEqual(self.connector.get_cost.call_count, 1)

        self.cost_evaluation.calculate_cost(workload, set([index_0]))
        self.assertEqual(self.cost_evaluation.cost_requests, 2)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)
        self.assertEqual(self.connector.get_cost.call_count, 2)
        self.connector.simulate_index.assert_called_with(index_0)
    def test_cache_hit_non_relevant_index(self):
        self.assertEqual(self.cost_evaluation.cost_requests, 0)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)

        workload = Workload([self.queries[0]])
        index_1 = Index([self.columns[1]])

        self.cost_evaluation.calculate_cost(workload, indexes=set())
        self.assertEqual(self.cost_evaluation.cost_requests, 1)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)
        self.assertEqual(self.connector.get_cost.call_count, 1)

        self.cost_evaluation.calculate_cost(workload, set([index_1]))
        self.assertEqual(self.cost_evaluation.cost_requests, 2)
        self.assertEqual(self.cost_evaluation.cache_hits, 1)
        self.assertEqual(self.connector.get_cost.call_count, 1)
        self.connector.simulate_index.assert_called_with(index_1)
    def setUpClass(cls):
        cls.db_name = "tpch_test_db_database"
        cls.scale_factor = 0.001
        generating_connector = PostgresDatabaseConnector(None, autocommit=True)

        table_generator = TableGenerator(
            "tpch",
            cls.scale_factor,
            generating_connector,
            explicit_database_name=cls.db_name,
        )

        cls.db = PostgresDatabaseConnector(cls.db_name)
        query_generator = QueryGenerator("tpch", cls.scale_factor, cls.db,
                                         [5, 6], table_generator.columns)
        cls.workload = Workload(query_generator.queries)

        generating_connector.close()
    def test_candidates_per_query(self):
        MAX_INDEX_WIDTH = 2
        query_1 = Query(18, """SELECT * FROM 1;""")
        workload = Workload([self.query_0, query_1])

        syntactically_relevant_indexes_mock = MagicMock(
            return_value=syntactically_relevant_indexes)

        result = candidates_per_query(
            workload,
            max_index_width=MAX_INDEX_WIDTH,
            candidate_generator=syntactically_relevant_indexes_mock,
        )

        self.assertEqual(len(result), len(workload.queries))
        syntactically_relevant_indexes_mock.assert_called_with(
            query_1, MAX_INDEX_WIDTH)
        syntactically_relevant_indexes_mock.assert_any_call(
            self.query_0, MAX_INDEX_WIDTH)
Пример #23
0
    def test_calculate_best_indexes_scenario_3(self):
        query_1 = Query(
            0,
            "SELECT * FROM TableA WHERE ColA = 1 AND ColB = 2;",
            [self.column_1, self.column_2],
        )
        workload = Workload([query_1])
        self.algo.cost_evaluation.calculate_cost = MagicMock(
            side_effect=self._calculate_cost_mock_3)

        # Budget too small for multi
        self.algo.budget = 2
        indexes = self.algo._calculate_best_indexes(workload)
        expected_indexes = [Index([self.column_2])]
        self.assertEqual(indexes, expected_indexes)

        # Picks multi with best ratio
        self.algo.budget = 4
        indexes = self.algo._calculate_best_indexes(workload)
        expected_indexes = [Index([self.column_2, self.column_1])]
        self.assertEqual(indexes, expected_indexes)
 def test_calculate_best(self):
     workload = Workload([], self.db_name)
     with self.assertRaises(NotImplementedError):
         self.selection_algorithm.calculate_best_indexes(workload)
 def test_cost_eval_cost_empty_workload(self):
     workload = Workload([], self.db_name)
     cost_eval = self.selection_algorithm.cost_evaluation
     cost = cost_eval.calculate_cost(workload, [])
     self.assertEqual(cost, 0)
Пример #26
0
    def test_workload(self):
        query_1 = Query(17, "SELECT * FROM TableA;")
        query_2 = Query(18, "SELECT * FROM nation;")

        workload = Workload([query_1, query_2])
        self.assertEqual(workload.queries, [query_1, query_2])