def testDiskCostNotChangedAfterQueryCombination(self):
        """Disk cost should not be changed after query combination"""
        d = Design()
        d = Design()
        for col_name in self.collections.iterkeys():
            d.addCollection(col_name)
        
        cost0 = self.cm.getCost(d)
        print "cost0 " + str(cost0)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d = Design()
        d = Design()
        for col_name in self.collections.iterkeys():
            d.addCollection(col_name)
            self.state.invalidateCache(col_name)
            
        d.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS)

        combinedWorkload = combiner.process(d)
        self.state.updateWorkload(combinedWorkload)
                
        self.cm.reset()
        self.cm.state.reset()
        cost1 = self.cm.getCost(d)

        print "cost1 " + str(cost1)
        
        self.assertEqual(cost0, cost1)
    def testQueriesCombination(self):
        """Test if the total number of queries are reduced"""
        original_number_of_queries = 0
        for sess in self.workload:
            for op in sess["operations"]:
                original_number_of_queries += 1

        print "orignal number of queries: " + str(original_number_of_queries)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d.addCollection(col_info['name'])

        d.setDenormalizationParent("koalas", "apples")

        combinedWorkload = combiner.process(d)

        number_of_queries_from_combined_workload = 0
        for sess in combinedWorkload:
            for op in sess["operations"]:
                number_of_queries_from_combined_workload += 1
                
        print "number of queries after query combination: " + str(number_of_queries_from_combined_workload)

        self.assertGreater(original_number_of_queries, number_of_queries_from_combined_workload)
Пример #3
0
    def testQueriesCombination(self):
        """Test if the total number of queries are reduced"""
        original_number_of_queries = 0
        for sess in self.workload:
            for op in sess["operations"]:
                original_number_of_queries += 1

        print "orignal number of queries: " + str(original_number_of_queries)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d.addCollection(col_info['name'])

        d.setDenormalizationParent("koalas", "apples")

        combinedWorkload = combiner.process(d)

        number_of_queries_from_combined_workload = 0
        for sess in combinedWorkload:
            for op in sess["operations"]:
                number_of_queries_from_combined_workload += 1

        print "number of queries after query combination: " + str(
            number_of_queries_from_combined_workload)

        self.assertGreater(original_number_of_queries,
                           number_of_queries_from_combined_workload)
    def testQueriesCombination(self):
        """Test if the total number of queries are reduced"""
        original_number_of_queries = 0
        for sess in self.workload:
            for op in sess["operations"]:
                original_number_of_queries += 1

        print "orignal number of queries: " + str(original_number_of_queries)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d = Design()
        for col_name in self.collections.iterkeys():
            d.addCollection(col_name)

        d.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS)

        combinedWorkload = combiner.process(d)

        number_of_queries_from_combined_workload = 0
        for sess in combinedWorkload:
            for op in sess["operations"]:
                number_of_queries_from_combined_workload += 1
                
        print "number of queries after query combination: " + str(number_of_queries_from_combined_workload)

        self.assertGreater(original_number_of_queries, number_of_queries_from_combined_workload)
Пример #5
0
 def testNetworkCostDenormalization(self):
     """Check network cost for queries that reference denormalized collections"""
     # Get the "base" design cost when all of the collections
     # are sharded on their "interesting" fields
     d = Design()
     i = 0
     for col_info in self.collections.itervalues():
         d.addCollection(col_info['name'])
         if i == 0:
             d.addShardKey(col_info['name'], col_info['interesting'])
         else:
             d.addShardKey(col_info['name'], ["_id"])
         
         self.cm.invalidateCache(d, col_info['name'])
         i += 1
     ## FOR
     self.cm.reset()
     self.state.reset()
     cost0 = self.cm.getCost(d)
     
     print "cost0:", cost0
     
     # Now get the network cost for when we denormalize the
     # second collection inside of the first one
     # We should have a lower cost because there should now be fewer queries
     d = Design()
     i = 0
     for col_info in self.collections.itervalues():
         self.assertTrue(col_info['interesting'])
         d.addCollection(col_info['name'])
         if i == 0:
             d.addShardKey(col_info['name'], col_info['interesting'])
         else:
             d.addShardKey(col_info['name'], ["_id"])
         self.cm.invalidateCache(d, col_info['name'])
         i += 1
         
     d.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS)
        
     combiner = WorkloadCombiner(self.collections, self.workload)
     combinedWorkload = combiner.process(d)
     self.state.updateWorkload(combinedWorkload)
     
     self.cm.reset()
     self.state.reset()
     cost1 = self.cm.getCost(d)
     print "cost1:", cost1
    
     self.assertLess(cost1, cost0)
    def testDiskCostChangesAfterQueryCombination(self):
        """
            Assume we have collection A, B, C and we want to embed C to A
            If we build index on field00 of A and field02 of C
            The cost after query combination should be lower
        """
        d0 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d0.addCollection(col_info['name'])
            d0.addIndex(col_info['name'], ['field00', 'field02'])
        
        cost0 = self.cm.getCost(d0)
        print "cost0 " + str(cost0)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d1 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d1.addCollection(col_info['name'])
            d1.addIndex(col_info['name'], ['field00', 'field02'])
            self.state.invalidateCache(col_info['name'])
            
        d1.setDenormalizationParent("koalas", "apples")

        combinedWorkload = combiner.process(d1)
        self.state.updateWorkload(combinedWorkload)
                
        self.cm.reset()
        self.cm.state.reset()
        cost1 = self.cm.getCost(d1)

        print "cost1 " + str(cost1)
        
        self.assertGreater(cost0, cost1)

        # Cost should remain the same after restoring the original workload
        self.state.restoreOriginalWorkload()
        self.cm.reset()
        print "child collection ", self.cm.child_collections
        self.cm.state.reset()
        cost2 = self.cm.getCost(d0)

        print "cost2 " + str(cost2)

        self.assertEqual(cost2, cost0)
Пример #7
0
    def testDiskCostChangesAfterQueryCombination(self):
        """
            Assume we have collection A, B, C and we want to embed C to A
            If we build index on field00 of A and field02 of C
            The cost after query combination should be lower
        """
        d0 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d0.addCollection(col_info['name'])
            d0.addIndex(col_info['name'], ['field00', 'field02'])

        cost0 = self.cm.getCost(d0)
        print "cost0 " + str(cost0)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d1 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d1.addCollection(col_info['name'])
            d1.addIndex(col_info['name'], ['field00', 'field02'])
            self.state.invalidateCache(col_info['name'])

        d1.setDenormalizationParent("koalas", "apples")

        combinedWorkload = combiner.process(d1)
        self.state.updateWorkload(combinedWorkload)

        self.cm.reset()
        self.cm.state.reset()
        cost1 = self.cm.getCost(d1)

        print "cost1 " + str(cost1)

        self.assertGreater(cost0, cost1)

        # Cost should remain the same after restoring the original workload
        self.state.restoreOriginalWorkload()
        self.cm.reset()
        print "child collection ", self.cm.child_collections
        self.cm.state.reset()
        cost2 = self.cm.getCost(d0)

        print "cost2 " + str(cost2)

        self.assertEqual(cost2, cost0)
    def testNotCollectionEmbeddingProcessShouldReturnNone(self):
        """
            If the given design has no collection embedding, we should return right away
        """
        d0 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d0.addCollection(col_info['name'])
            d0.addIndex(col_info['name'], ['field00', 'field02'])

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        combinedWorkload = combiner.process(d0)
        self.assertEqual(None, combinedWorkload)
Пример #9
0
    def testNotCollectionEmbeddingProcessShouldReturnNone(self):
        """
            If the given design has no collection embedding, we should return right away
        """
        d0 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d0.addCollection(col_info['name'])
            d0.addIndex(col_info['name'], ['field00', 'field02'])

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        combinedWorkload = combiner.process(d0)
        self.assertEqual(None, combinedWorkload)
Пример #10
0
    def overallCost(self, design):
        # TODO: We should reset any cache entries for only those collections
        #       that were changed in this new design from the last design
        self.new_design = design
        
        combiner = WorkloadCombiner(self.col_names, self.workload, self.collections)
        combinedWorkload = combiner.process(design)
        if combinedWorkload:
            self.state.updateWorkload(combinedWorkload)
            self.skewComponent.splitWorkload()

        num_nodes = self.state.calcNumNodes(design, self.maxCardinality)

        # This is meant to apply to all components
        # but it only works with network component
        # for disk component, we have to use reset now
        # TODO yang: make this beautiful
        map(self.invalidateCache, design.getDelta(self.last_design))
        self.diskComponent.reset()
        
        if self.debug:
            LOG.debug("New Design:\n%s", design)
            self.state.cache_hit_ctr.clear()
            self.state.cache_miss_ctr.clear()
        
        cost = 0.0
        start = time.time()
        if self.state.weight_disk > 0:
            cost += self.state.weight_disk * self.diskComponent.getCost(design, num_nodes)
        if self.state.weight_network > 0:
            cost += self.state.weight_network * self.networkComponent.getCost(design, num_nodes)
        if self.state.weight_skew > 0:
            cost += self.state.weight_skew * self.skewComponent.getCost(design, num_nodes)
        stop = time.time()
            
        self.last_cost = cost / self.weights_sum
        self.last_design = design

        # Calculate cache hit/miss ratio
        LOG.info("Overall Cost %f / Computed in %.2f seconds, design\n %s", \
                 self.last_cost, (stop - start), design)

        self.finish()
        if combinedWorkload:
            self.state.restoreOriginalWorkload()
            self.skewComponent.splitWorkload()

        return self.last_cost
Пример #11
0
    def testNetworkCostShouldReduceAfterQueryCombination(self):
        """
            Network cost should be reduce after embedding collections
        """
        d0 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d0.addCollection(col_info['name'])
            d0.addIndex(col_info['name'], ['field00', 'field02'])
        cost0 = self.cmn.getCost(d0)
        print "cost0 " + str(cost0)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d1 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d1.addCollection(col_info['name'])
            d1.addIndex(col_info['name'], ['field00', 'field02'])
            self.state.invalidateCache(col_info['name'])

        d1.setDenormalizationParent("koalas", "apples")

        combinedWorkload = combiner.process(d1)
        self.state.updateWorkload(combinedWorkload)

        self.cmn.reset()
        self.cmn.state.reset()
        cost1 = self.cmn.getCost(d1)

        print "cost1 " + str(cost1)

        self.assertGreater(cost0, cost1)

        # Cost should remain the same after restoring the original workload
        self.state.restoreOriginalWorkload()
        self.cmn.reset()
        self.cmn.state.reset()
        cost2 = self.cmn.getCost(d0)

        print "cost2 " + str(cost2)

        self.assertEqual(cost2, cost0)
Пример #12
0
    def overallCost(self, design):
        # TODO: We should reset any cache entries for only those collections
        #       that were changed in this new design from the last design
        self.new_design = design

        combiner = WorkloadCombiner(self.col_names, self.workload)
        combinedWorkload = combiner.process(design)
        if combinedWorkload:
            self.state.updateWorkload(combinedWorkload)

        # This is meant to apply to all components
        # but it only works with network component
        # for disk component, we have to use reset now
        # TODO yang: make this beautiful
        map(self.invalidateCache, design.getDelta(self.last_design))
        self.diskComponent.reset()

        if self.debug:
            LOG.debug("New Design:\n%s", design)
            self.state.cache_hit_ctr.clear()
            self.state.cache_miss_ctr.clear()

        cost = 0.0
        start = time.time()
        if self.state.weight_disk > 0:
            cost += self.state.weight_disk * self.diskComponent.getCost(design)
        if self.state.weight_network > 0:
            cost += self.state.weight_network * self.networkComponent.getCost(
                design)
        if self.state.weight_skew > 0:
            cost += self.state.weight_skew * self.skewComponent.getCost(design)
        stop = time.time()

        self.last_cost = cost / self.weights_sum
        self.last_design = design

        # Calculate cache hit/miss ratio
        LOG.info("Overall Cost %.3f / Computed in %.2f seconds", \
                 self.last_cost, (stop - start))

        self.finish()
        if combinedWorkload:
            self.state.restoreOriginalWorkload()

        return self.last_cost
Пример #13
0
    def testNetworkCostShouldReduceAfterQueryCombination(self):
        """
            Network cost should be reduce after embedding collections
        """
        d0 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d0.addCollection(col_info['name'])
            d0.addIndex(col_info['name'], ['field00', 'field02'])
        cost0 = self.cmn.getCost(d0)
        print "cost0 " + str(cost0)

        # Initialize a combiner
        combiner = WorkloadCombiner(self.col_names, self.workload)

        # initialize a design with denormalization
        d1 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d1.addCollection(col_info['name'])
            d1.addIndex(col_info['name'], ['field00', 'field02'])
            self.state.invalidateCache(col_info['name'])

        d1.setDenormalizationParent("koalas", "apples")

        combinedWorkload = combiner.process(d1)
        self.state.updateWorkload(combinedWorkload)

        self.cmn.reset()
        self.cmn.state.reset()
        cost1 = self.cmn.getCost(d1)

        print "cost1 " + str(cost1)

        self.assertGreater(cost0, cost1)

        # Cost should remain the same after restoring the original workload
        self.state.restoreOriginalWorkload()
        self.cmn.reset()
        self.cmn.state.reset()
        cost2 = self.cmn.getCost(d0)

        print "cost2 " + str(cost2)

        self.assertEqual(cost2, cost0)
Пример #14
0
    def testNetworkCostDenormalization(self):
        """Check network cost for queries that reference denormalized collections"""
        # Get the "base" design cost when all of the collections
        # are sharded on their "interesting" fields
        d0 = Design()
        for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            d0.addCollection(col_info['name'])
            if i == 0:
                d0.addShardKey(col_info['name'], col_info['interesting'])
            else:
                d0.addShardKey(col_info['name'], ["_id"])
            self.cm.invalidateCache(d0, col_info['name'])
        ## FOR
        self.cm.reset()
        self.state.reset()
        cost0 = self.cm.getCost(d0)
        
        print "cost0:", cost0
        
        # Now get the network cost for when we denormalize the
        # second collection inside of the first one
        # We should have a lower cost because there should now be fewer queries
        d1 = Design()
        for i in xrange(0, len(CostModelTestCase.COLLECTION_NAMES)):
            col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            self.assertTrue(col_info['interesting'])
            d1.addCollection(col_info['name'])
            if i == 0:
                d1.addShardKey(col_info['name'], col_info['interesting'])
            else:
                parent = self.collections[CostModelTestCase.COLLECTION_NAMES[0]]
                self.assertIsNotNone(parent)
                d1.setDenormalizationParent(col_info['name'], parent['name'])
                self.assertTrue(d1.isDenormalized(col_info['name']), col_info['name'])
                self.assertIsNotNone(d1.getDenormalizationParent(col_info['name']))
            
            self.cm.invalidateCache(d1, col_info['name'])

        combiner = WorkloadCombiner(self.collections, self.workload)
        combinedWorkload = combiner.process(d1)
        self.state.updateWorkload(combinedWorkload)
        
        self.cm.reset()
        self.state.reset()
        cost1 = self.cm.getCost(d1)
        print "cost1:", cost1
       
        self.assertLess(cost1, cost0)

        # The denormalization cost should also be the same as the cost
        # when we remove all of the ops one the second collection
        backup_collections = copy.deepcopy(self.collections)

        for sess in self.state.workload:
            for op in sess["operations"]:
                if op["collection"] <> CostModelTestCase.COLLECTION_NAMES[0]:
                    sess["operations"].remove(op)
            ## FOR (op)
        ## FOR (sess)
        for i in xrange(1, len(CostModelTestCase.COLLECTION_NAMES)):
            del self.collections[CostModelTestCase.COLLECTION_NAMES[i]]
            print "deleted name: ", CostModelTestCase.COLLECTION_NAMES[i]

        self.cm.reset()
        self.state.reset()
        cost2 = self.cm.getCost(d1)
        print "cost2:", cost2

        self.assertEqual(cost1, cost2)

        # Restore the original workload and see if the cost remains the same with the original one
        self.state.restoreOriginalWorkload()
        self.state.collections = backup_collections
        
        self.cm.reset()
        self.state.reset()
        cost3 = self.cm.getCost(d0)
        print "cost3:", cost3
        
        self.assertEqual(cost3, cost0)