class TestDiskCostIndexesWithProjection(CostModelTestCase):

    def setUp(self):
        CostModelTestCase.setUp(self)
        self.cm = DiskCostComponent(self.state)
        self.cm.no_index_insertion_penalty = True
        
    ## DEF
    def testDiskCostIndexes(self):
        """Check whether disk cost calculations work correctly"""
        # First get the disk cost when there are no indexes
        d = Design()
        col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
        d.addCollection(col_info['name'])

        cost0 = self.cm.getCost(d)
        print "diskCost0:", cost0
        # The cost should be exactly equal to one, which means that every operation
        # has to perform a full sequential scan on the collection
        self.assertEqual(cost0, 1.0)

        # Now add one index. The disk cost should be lower
        d = Design()
        col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
        d.addCollection(col_info['name'])
        d.addIndex(col_info['name'], ["field01"])
        self.state.invalidateCache(col_info['name'])

        self.cm.reset()
        self.cm.state.reset()
        cost1 = self.cm.getCost(d)
        print "diskCost1:", cost1
        self.assertGreater(cost0, cost1)
        
        # Now add one more index. The disk cost should be lower again
        d = Design()
        col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
        d.addCollection(col_info['name'])
        d.addIndex(col_info['name'], ["field01", "field00"])
        self.state.invalidateCache(col_info['name'])

        self.cm.reset()
        self.cm.state.reset()
        cost2 = self.cm.getCost(d)
        print "diskCost2:", cost2
        
        # Now add the one index. The disk cost should be much lower
        d = Design()
        col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
        d.addCollection(col_info['name'])
        d.addIndex(col_info['name'], ["field01", "field00", "field02"])
        self.state.invalidateCache(col_info['name'])

        self.cm.reset()
        self.cm.state.reset()
        cost3 = self.cm.getCost(d)
        print "diskCost3:", cost3
        self.assertGreater(cost2, cost3)
class TestDiskCostIndexesWithProjection(CostModelTestCase):
    def setUp(self):
        CostModelTestCase.setUp(self)
        self.cm = DiskCostComponent(self.state)
        self.cm.no_index_insertion_penalty = True

    ## DEF
    def testDiskCostIndexes(self):
        """Check whether disk cost calculations work correctly"""
        # First get the disk cost when there are no indexes
        d = Design()
        col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
        d.addCollection(col_info['name'])

        cost0 = self.cm.getCost(d)
        print "diskCost0:", cost0
        # The cost should be exactly equal to one, which means that every operation
        # has to perform a full sequential scan on the collection
        self.assertEqual(cost0, 1.0)

        # Now add one index. The disk cost should be lower
        d = Design()
        col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
        d.addCollection(col_info['name'])
        d.addIndex(col_info['name'], ["field01"])
        self.state.invalidateCache(col_info['name'])

        self.cm.reset()
        self.cm.state.reset()
        cost1 = self.cm.getCost(d)
        print "diskCost1:", cost1
        self.assertGreater(cost0, cost1)

        # Now add one more index. The disk cost should be lower again
        d = Design()
        col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
        d.addCollection(col_info['name'])
        d.addIndex(col_info['name'], ["field01", "field00"])
        self.state.invalidateCache(col_info['name'])

        self.cm.reset()
        self.cm.state.reset()
        cost2 = self.cm.getCost(d)
        print "diskCost2:", cost2

        # Now add the one index. The disk cost should be much lower
        d = Design()
        col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
        d.addCollection(col_info['name'])
        d.addIndex(col_info['name'], ["field01", "field00", "field02"])
        self.state.invalidateCache(col_info['name'])

        self.cm.reset()
        self.cm.state.reset()
        cost3 = self.cm.getCost(d)
        print "diskCost3:", cost3
        self.assertGreater(cost2, cost3)
class TestWorkloadCombiner(CostModelTestCase):

    def setUp(self):
        CostModelTestCase.setUp(self)
        self.cm = DiskCostComponent(self.state)
        self.cmn = NetworkCostComponent(self.state)
        self.col_names = [ x for x in self.collections.iterkeys()]
    ## DEF

    def testQueriesCombination(self):
        """Test if the total number of queries are reduced"""
        original_number_of_queries = 0
        for sess in self.workload:
            for op in sess["operations"]:
                original_number_of_queries += 1

        print "orignal number of queries: " + str(original_number_of_queries)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d.addCollection(col_info['name'])

        d.setDenormalizationParent("koalas", "apples")

        combinedWorkload = combiner.process(d)

        number_of_queries_from_combined_workload = 0
        for sess in combinedWorkload:
            for op in sess["operations"]:
                number_of_queries_from_combined_workload += 1
                
        print "number of queries after query combination: " + str(number_of_queries_from_combined_workload)

        self.assertGreater(original_number_of_queries, number_of_queries_from_combined_workload)

    ## DEF
    
    def testDiskCostChangesAfterQueryCombination(self):
        """
            Assume we have collection A, B, C and we want to embed C to A
            If we build index on field00 of A and field02 of C
            The cost after query combination should be lower
        """
        d0 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d0.addCollection(col_info['name'])
            d0.addIndex(col_info['name'], ['field00', 'field02'])
        
        cost0 = self.cm.getCost(d0)
        print "cost0 " + str(cost0)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d1 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d1.addCollection(col_info['name'])
            d1.addIndex(col_info['name'], ['field00', 'field02'])
            self.state.invalidateCache(col_info['name'])
            
        d1.setDenormalizationParent("koalas", "apples")

        combinedWorkload = combiner.process(d1)
        self.state.updateWorkload(combinedWorkload)
                
        self.cm.reset()
        self.cm.state.reset()
        cost1 = self.cm.getCost(d1)

        print "cost1 " + str(cost1)
        
        self.assertGreater(cost0, cost1)

        # Cost should remain the same after restoring the original workload
        self.state.restoreOriginalWorkload()
        self.cm.reset()
        print "child collection ", self.cm.child_collections
        self.cm.state.reset()
        cost2 = self.cm.getCost(d0)

        print "cost2 " + str(cost2)

        self.assertEqual(cost2, cost0)
    ## def
    
    def testNetworkCostShouldReduceAfterQueryCombination(self):
        """
            Network cost should be reduce after embedding collections
        """
        d0 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d0.addCollection(col_info['name'])
            d0.addIndex(col_info['name'], ['field00', 'field02'])
        cost0 = self.cmn.getCost(d0)
        print "cost0 " + str(cost0)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d1 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d1.addCollection(col_info['name'])
            d1.addIndex(col_info['name'], ['field00', 'field02'])
            self.state.invalidateCache(col_info['name'])

        d1.setDenormalizationParent("koalas", "apples")

        combinedWorkload = combiner.process(d1)
        self.state.updateWorkload(combinedWorkload)

        self.cmn.reset()
        self.cmn.state.reset()
        cost1 = self.cmn.getCost(d1)

        print "cost1 " + str(cost1)

        self.assertGreater(cost0, cost1)

        # Cost should remain the same after restoring the original workload
        self.state.restoreOriginalWorkload()
        self.cmn.reset()
        self.cmn.state.reset()
        cost2 = self.cmn.getCost(d0)

        print "cost2 " + str(cost2)

        self.assertEqual(cost2, cost0)
    ## def

    def testNotCollectionEmbeddingProcessShouldReturnNone(self):
        """
            If the given design has no collection embedding, we should return right away
        """
        d0 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d0.addCollection(col_info['name'])
            d0.addIndex(col_info['name'], ['field00', 'field02'])

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        combinedWorkload = combiner.process(d0)
        self.assertEqual(None, combinedWorkload)
class TestWorkloadCombiner(CostModelTestCase):

    def setUp(self):
        CostModelTestCase.setUp(self)
        self.cm = DiskCostComponent(self.state)
        self.col_names = [ x for x in self.collections.iterkeys()]
        ## DEF

    def testQueriesCombination(self):
        """Test if the total number of queries are reduced"""
        original_number_of_queries = 0
        for sess in self.workload:
            for op in sess["operations"]:
                original_number_of_queries += 1

        print "orignal number of queries: " + str(original_number_of_queries)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d = Design()
        for col_name in self.collections.iterkeys():
            d.addCollection(col_name)

        d.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS)

        combinedWorkload = combiner.process(d)

        number_of_queries_from_combined_workload = 0
        for sess in combinedWorkload:
            for op in sess["operations"]:
                number_of_queries_from_combined_workload += 1
                
        print "number of queries after query combination: " + str(number_of_queries_from_combined_workload)

        self.assertGreater(original_number_of_queries, number_of_queries_from_combined_workload)
        
    def testDiskCostNotChangedAfterQueryCombination(self):
        """Disk cost should not be changed after query combination"""
        d = Design()
        d = Design()
        for col_name in self.collections.iterkeys():
            d.addCollection(col_name)
        
        cost0 = self.cm.getCost(d)
        print "cost0 " + str(cost0)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d = Design()
        d = Design()
        for col_name in self.collections.iterkeys():
            d.addCollection(col_name)
            self.state.invalidateCache(col_name)
            
        d.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS)

        combinedWorkload = combiner.process(d)
        self.state.updateWorkload(combinedWorkload)
                
        self.cm.reset()
        self.cm.state.reset()
        cost1 = self.cm.getCost(d)

        print "cost1 " + str(cost1)
        
        self.assertEqual(cost0, cost1)
class TestDiskCostIndexes(CostModelTestCase):

    def setUp(self):
        CostModelTestCase.setUp(self)
        self.cm = DiskCostComponent(self.state)
        self.cm.no_index_insertion_penalty = True
    # DEF
    def testDiskCostIndexes(self):
        """Check whether disk cost calculations work correctly"""
        # First get the disk cost when there are no indexes
        d = Design()
        col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
        d.addCollection(col_info['name'])

        cost0 = self.cm.getCost(d)
        print "diskCost0:", cost0
        # The cost should be exactly equal to one, which means that every operation
        # has to perform a full sequential scan on the collection
        self.assertEqual(cost0, 1.0)

        # Now add the all indexes. The disk cost should be lower
        d = Design()
        col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
        d.addCollection(col_info['name'])
        d.addIndex(col_info['name'], col_info['interesting'])
        self.state.invalidateCache(col_info['name'])

        self.cm.reset()
        self.cm.state.reset()
        cost1 = self.cm.getCost(d)
        print "diskCost1:", cost1
        self.assertGreater(cost0, cost1)

    def testDiskCostOnDifferentIndexes(self):
        """Check how indexes will affect the disk cost"""
        # 1. Put index on both of the fields seperately
        d = Design()
        d.addCollection(CostModelTestCase.COLLECTION_NAME)
        d.addIndex(CostModelTestCase.COLLECTION_NAME, ["field00"])
        d.addIndex(CostModelTestCase.COLLECTION_NAME, ["field01"])

        self.cm.reset()
        self.cm.state.reset()
        cost0 = self.cm.getCost(d)
        print "diskCost0:", cost0

        # 3. Put indexes on both field together
        d = Design()
        col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
        d.addCollection(CostModelTestCase.COLLECTION_NAME)
        d.addIndex(CostModelTestCase.COLLECTION_NAME, ["field01", "field00"])
        self.state.invalidateCache(col_info['name'])

        self.cm.reset()
        self.cm.state.reset()
        cost1 = self.cm.getCost(d)
        print "diskCost1:", cost1

        self.assertGreater(cost0, cost1)

    def testDiskCostCaching(self):
        """Check whether disk cost calculations work correctly with caching enabled"""
        self.cm.cache_enable = True

        # Give the mofo a full Design with indexes
        d = Design()
        col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
        d.addCollection(col_info['name'])
        d.addIndex(col_info['name'], col_info['interesting'])
            ## FOR
        cost0 = self.cm.getCost(d)
        print "diskCost0:", cost0
        # FIXME self.assertGreater(cost0, 0.0)

        # We should get the same cost back after we execute it a second time
        cost1 = self.cm.getCost(d)
        print "diskCost1:", cost1
Exemplo n.º 6
0
class TestWorkloadCombiner(CostModelTestCase):
    def setUp(self):
        CostModelTestCase.setUp(self)
        self.cm = DiskCostComponent(self.state)
        self.cmn = NetworkCostComponent(self.state)
        self.col_names = [x for x in self.collections.iterkeys()]

    ## DEF

    def testQueriesCombination(self):
        """Test if the total number of queries are reduced"""
        original_number_of_queries = 0
        for sess in self.workload:
            for op in sess["operations"]:
                original_number_of_queries += 1

        print "orignal number of queries: " + str(original_number_of_queries)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d.addCollection(col_info['name'])

        d.setDenormalizationParent("koalas", "apples")

        combinedWorkload = combiner.process(d)

        number_of_queries_from_combined_workload = 0
        for sess in combinedWorkload:
            for op in sess["operations"]:
                number_of_queries_from_combined_workload += 1

        print "number of queries after query combination: " + str(
            number_of_queries_from_combined_workload)

        self.assertGreater(original_number_of_queries,
                           number_of_queries_from_combined_workload)

    ## DEF

    def testDiskCostChangesAfterQueryCombination(self):
        """
            Assume we have collection A, B, C and we want to embed C to A
            If we build index on field00 of A and field02 of C
            The cost after query combination should be lower
        """
        d0 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d0.addCollection(col_info['name'])
            d0.addIndex(col_info['name'], ['field00', 'field02'])

        cost0 = self.cm.getCost(d0)
        print "cost0 " + str(cost0)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d1 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d1.addCollection(col_info['name'])
            d1.addIndex(col_info['name'], ['field00', 'field02'])
            self.state.invalidateCache(col_info['name'])

        d1.setDenormalizationParent("koalas", "apples")

        combinedWorkload = combiner.process(d1)
        self.state.updateWorkload(combinedWorkload)

        self.cm.reset()
        self.cm.state.reset()
        cost1 = self.cm.getCost(d1)

        print "cost1 " + str(cost1)

        self.assertGreater(cost0, cost1)

        # Cost should remain the same after restoring the original workload
        self.state.restoreOriginalWorkload()
        self.cm.reset()
        print "child collection ", self.cm.child_collections
        self.cm.state.reset()
        cost2 = self.cm.getCost(d0)

        print "cost2 " + str(cost2)

        self.assertEqual(cost2, cost0)

    ## def

    def testNetworkCostShouldReduceAfterQueryCombination(self):
        """
            Network cost should be reduce after embedding collections
        """
        d0 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d0.addCollection(col_info['name'])
            d0.addIndex(col_info['name'], ['field00', 'field02'])
        cost0 = self.cmn.getCost(d0)
        print "cost0 " + str(cost0)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d1 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d1.addCollection(col_info['name'])
            d1.addIndex(col_info['name'], ['field00', 'field02'])
            self.state.invalidateCache(col_info['name'])

        d1.setDenormalizationParent("koalas", "apples")

        combinedWorkload = combiner.process(d1)
        self.state.updateWorkload(combinedWorkload)

        self.cmn.reset()
        self.cmn.state.reset()
        cost1 = self.cmn.getCost(d1)

        print "cost1 " + str(cost1)

        self.assertGreater(cost0, cost1)

        # Cost should remain the same after restoring the original workload
        self.state.restoreOriginalWorkload()
        self.cmn.reset()
        self.cmn.state.reset()
        cost2 = self.cmn.getCost(d0)

        print "cost2 " + str(cost2)

        self.assertEqual(cost2, cost0)

    ## def

    def testNotCollectionEmbeddingProcessShouldReturnNone(self):
        """
            If the given design has no collection embedding, we should return right away
        """
        d0 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d0.addCollection(col_info['name'])
            d0.addIndex(col_info['name'], ['field00', 'field02'])

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        combinedWorkload = combiner.process(d0)
        self.assertEqual(None, combinedWorkload)
class TestDiskCost_IndexInsertionPenalty(CostModelTestCase):

    def setUp(self):
        CostModelTestCase.setUp(self)
        self.cm = DiskCostComponent(self.state)
    # DEF

    def testDiskCost_IndexInsertionPenalty(self):
        """
            IndexInsertionPenalty should be high if we build bad indexes
        """
        # 1
        d = Design()
        for col_name in CostModelTestCase.COLLECTION_NAMES:
            d.addCollection(col_name)
            d.addIndex(col_name, ["field00"])
        ## FOR

        self.cm.reset()
        self.cm.state.reset()
        self.cm.getCost(d)
        p0 = self.cm.total_index_insertion_penalty
        
        # 2
        d = Design()
        for col_name in CostModelTestCase.COLLECTION_NAMES:
            d.addCollection(col_name)
            d.addIndex(col_name, ["field01"])
        ## FOR

        self.cm.reset()
        self.cm.state.reset()
        self.cm.getCost(d)
        p1 = self.cm.total_index_insertion_penalty
        
        self.assertEqual(p0, p1)
        
        #3
        d = Design()
        for col_name in CostModelTestCase.COLLECTION_NAMES:
            d.addCollection(col_name)
            d.addIndex(col_name, ["field00", "field01"])
        ## FOR

        self.cm.reset()
        self.cm.state.reset()
        self.cm.getCost(d)
        p2 = self.cm.total_index_insertion_penalty
        
        self.assertEqual(p0, p2)
        
        #4
        d = Design()
        for col_name in CostModelTestCase.COLLECTION_NAMES:
            d.addCollection(col_name)
            d.addIndex(col_name, ["field00", "field02"])
        ## FOR

        self.cm.reset()
        self.cm.state.reset()
        self.cm.getCost(d)
        p3 = self.cm.total_index_insertion_penalty
        
        self.assertGreater(p3, p0)
        
        #5
        d = Design()
        for col_name in CostModelTestCase.COLLECTION_NAMES:
            d.addCollection(col_name)
            d.addIndex(col_name, ["field01", "field02"])
        ## FOR

        self.cm.reset()
        self.cm.state.reset()
        self.cm.getCost(d)
        p4 = self.cm.total_index_insertion_penalty
        
        self.assertGreater(p4, p0)
        
        #6
        d = Design()
        for col_name in CostModelTestCase.COLLECTION_NAMES:
            d.addCollection(col_name)
            d.addIndex(col_name, ["field00", "field01", "field02"])
        ## FOR

        self.cm.reset()
        self.cm.state.reset()
        self.cm.getCost(d)
        p5 = self.cm.total_index_insertion_penalty
        
        self.assertGreater(p5, p0)
    ## DEF
    
    def testDiskCost_IndexInsertionPenalty_integrated_to_cost_component(self):
        """