def testDiskCostNotChangedAfterQueryCombination(self): """Disk cost should not be changed after query combination""" d = Design() d = Design() for col_name in self.collections.iterkeys(): d.addCollection(col_name) cost0 = self.cm.getCost(d) print "cost0 " + str(cost0) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) # initialize a design with denormalization d = Design() d = Design() for col_name in self.collections.iterkeys(): d.addCollection(col_name) self.state.invalidateCache(col_name) d.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS) combinedWorkload = combiner.process(d) self.state.updateWorkload(combinedWorkload) self.cm.reset() self.cm.state.reset() cost1 = self.cm.getCost(d) print "cost1 " + str(cost1) self.assertEqual(cost0, cost1)
def testQueriesCombination(self): """Test if the total number of queries are reduced""" original_number_of_queries = 0 for sess in self.workload: for op in sess["operations"]: original_number_of_queries += 1 print "orignal number of queries: " + str(original_number_of_queries) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) # initialize a design with denormalization d = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d.addCollection(col_info['name']) d.setDenormalizationParent("koalas", "apples") combinedWorkload = combiner.process(d) number_of_queries_from_combined_workload = 0 for sess in combinedWorkload: for op in sess["operations"]: number_of_queries_from_combined_workload += 1 print "number of queries after query combination: " + str(number_of_queries_from_combined_workload) self.assertGreater(original_number_of_queries, number_of_queries_from_combined_workload)
def testQueriesCombination(self): """Test if the total number of queries are reduced""" original_number_of_queries = 0 for sess in self.workload: for op in sess["operations"]: original_number_of_queries += 1 print "orignal number of queries: " + str(original_number_of_queries) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) # initialize a design with denormalization d = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d.addCollection(col_info['name']) d.setDenormalizationParent("koalas", "apples") combinedWorkload = combiner.process(d) number_of_queries_from_combined_workload = 0 for sess in combinedWorkload: for op in sess["operations"]: number_of_queries_from_combined_workload += 1 print "number of queries after query combination: " + str( number_of_queries_from_combined_workload) self.assertGreater(original_number_of_queries, number_of_queries_from_combined_workload)
def testQueriesCombination(self): """Test if the total number of queries are reduced""" original_number_of_queries = 0 for sess in self.workload: for op in sess["operations"]: original_number_of_queries += 1 print "orignal number of queries: " + str(original_number_of_queries) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) # initialize a design with denormalization d = Design() for col_name in self.collections.iterkeys(): d.addCollection(col_name) d.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS) combinedWorkload = combiner.process(d) number_of_queries_from_combined_workload = 0 for sess in combinedWorkload: for op in sess["operations"]: number_of_queries_from_combined_workload += 1 print "number of queries after query combination: " + str(number_of_queries_from_combined_workload) self.assertGreater(original_number_of_queries, number_of_queries_from_combined_workload)
def testNetworkCostDenormalization(self): """Check network cost for queries that reference denormalized collections""" # Get the "base" design cost when all of the collections # are sharded on their "interesting" fields d = Design() i = 0 for col_info in self.collections.itervalues(): d.addCollection(col_info['name']) if i == 0: d.addShardKey(col_info['name'], col_info['interesting']) else: d.addShardKey(col_info['name'], ["_id"]) self.cm.invalidateCache(d, col_info['name']) i += 1 ## FOR self.cm.reset() self.state.reset() cost0 = self.cm.getCost(d) print "cost0:", cost0 # Now get the network cost for when we denormalize the # second collection inside of the first one # We should have a lower cost because there should now be fewer queries d = Design() i = 0 for col_info in self.collections.itervalues(): self.assertTrue(col_info['interesting']) d.addCollection(col_info['name']) if i == 0: d.addShardKey(col_info['name'], col_info['interesting']) else: d.addShardKey(col_info['name'], ["_id"]) self.cm.invalidateCache(d, col_info['name']) i += 1 d.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS) combiner = WorkloadCombiner(self.collections, self.workload) combinedWorkload = combiner.process(d) self.state.updateWorkload(combinedWorkload) self.cm.reset() self.state.reset() cost1 = self.cm.getCost(d) print "cost1:", cost1 self.assertLess(cost1, cost0)
def testDiskCostChangesAfterQueryCombination(self): """ Assume we have collection A, B, C and we want to embed C to A If we build index on field00 of A and field02 of C The cost after query combination should be lower """ d0 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d0.addCollection(col_info['name']) d0.addIndex(col_info['name'], ['field00', 'field02']) cost0 = self.cm.getCost(d0) print "cost0 " + str(cost0) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) # initialize a design with denormalization d1 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d1.addCollection(col_info['name']) d1.addIndex(col_info['name'], ['field00', 'field02']) self.state.invalidateCache(col_info['name']) d1.setDenormalizationParent("koalas", "apples") combinedWorkload = combiner.process(d1) self.state.updateWorkload(combinedWorkload) self.cm.reset() self.cm.state.reset() cost1 = self.cm.getCost(d1) print "cost1 " + str(cost1) self.assertGreater(cost0, cost1) # Cost should remain the same after restoring the original workload self.state.restoreOriginalWorkload() self.cm.reset() print "child collection ", self.cm.child_collections self.cm.state.reset() cost2 = self.cm.getCost(d0) print "cost2 " + str(cost2) self.assertEqual(cost2, cost0)
def testNotCollectionEmbeddingProcessShouldReturnNone(self): """ If the given design has no collection embedding, we should return right away """ d0 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d0.addCollection(col_info['name']) d0.addIndex(col_info['name'], ['field00', 'field02']) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) combinedWorkload = combiner.process(d0) self.assertEqual(None, combinedWorkload)
def overallCost(self, design): # TODO: We should reset any cache entries for only those collections # that were changed in this new design from the last design self.new_design = design combiner = WorkloadCombiner(self.col_names, self.workload, self.collections) combinedWorkload = combiner.process(design) if combinedWorkload: self.state.updateWorkload(combinedWorkload) self.skewComponent.splitWorkload() num_nodes = self.state.calcNumNodes(design, self.maxCardinality) # This is meant to apply to all components # but it only works with network component # for disk component, we have to use reset now # TODO yang: make this beautiful map(self.invalidateCache, design.getDelta(self.last_design)) self.diskComponent.reset() if self.debug: LOG.debug("New Design:\n%s", design) self.state.cache_hit_ctr.clear() self.state.cache_miss_ctr.clear() cost = 0.0 start = time.time() if self.state.weight_disk > 0: cost += self.state.weight_disk * self.diskComponent.getCost(design, num_nodes) if self.state.weight_network > 0: cost += self.state.weight_network * self.networkComponent.getCost(design, num_nodes) if self.state.weight_skew > 0: cost += self.state.weight_skew * self.skewComponent.getCost(design, num_nodes) stop = time.time() self.last_cost = cost / self.weights_sum self.last_design = design # Calculate cache hit/miss ratio LOG.info("Overall Cost %f / Computed in %.2f seconds, design\n %s", \ self.last_cost, (stop - start), design) self.finish() if combinedWorkload: self.state.restoreOriginalWorkload() self.skewComponent.splitWorkload() return self.last_cost
def testNetworkCostShouldReduceAfterQueryCombination(self): """ Network cost should be reduce after embedding collections """ d0 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d0.addCollection(col_info['name']) d0.addIndex(col_info['name'], ['field00', 'field02']) cost0 = self.cmn.getCost(d0) print "cost0 " + str(cost0) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) # initialize a design with denormalization d1 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d1.addCollection(col_info['name']) d1.addIndex(col_info['name'], ['field00', 'field02']) self.state.invalidateCache(col_info['name']) d1.setDenormalizationParent("koalas", "apples") combinedWorkload = combiner.process(d1) self.state.updateWorkload(combinedWorkload) self.cmn.reset() self.cmn.state.reset() cost1 = self.cmn.getCost(d1) print "cost1 " + str(cost1) self.assertGreater(cost0, cost1) # Cost should remain the same after restoring the original workload self.state.restoreOriginalWorkload() self.cmn.reset() self.cmn.state.reset() cost2 = self.cmn.getCost(d0) print "cost2 " + str(cost2) self.assertEqual(cost2, cost0)
def overallCost(self, design): # TODO: We should reset any cache entries for only those collections # that were changed in this new design from the last design self.new_design = design combiner = WorkloadCombiner(self.col_names, self.workload) combinedWorkload = combiner.process(design) if combinedWorkload: self.state.updateWorkload(combinedWorkload) # This is meant to apply to all components # but it only works with network component # for disk component, we have to use reset now # TODO yang: make this beautiful map(self.invalidateCache, design.getDelta(self.last_design)) self.diskComponent.reset() if self.debug: LOG.debug("New Design:\n%s", design) self.state.cache_hit_ctr.clear() self.state.cache_miss_ctr.clear() cost = 0.0 start = time.time() if self.state.weight_disk > 0: cost += self.state.weight_disk * self.diskComponent.getCost(design) if self.state.weight_network > 0: cost += self.state.weight_network * self.networkComponent.getCost( design) if self.state.weight_skew > 0: cost += self.state.weight_skew * self.skewComponent.getCost(design) stop = time.time() self.last_cost = cost / self.weights_sum self.last_design = design # Calculate cache hit/miss ratio LOG.info("Overall Cost %.3f / Computed in %.2f seconds", \ self.last_cost, (stop - start)) self.finish() if combinedWorkload: self.state.restoreOriginalWorkload() return self.last_cost
def testNetworkCostDenormalization(self): """Check network cost for queries that reference denormalized collections""" # Get the "base" design cost when all of the collections # are sharded on their "interesting" fields d0 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d0.addCollection(col_info['name']) if i == 0: d0.addShardKey(col_info['name'], col_info['interesting']) else: d0.addShardKey(col_info['name'], ["_id"]) self.cm.invalidateCache(d0, col_info['name']) ## FOR self.cm.reset() self.state.reset() cost0 = self.cm.getCost(d0) print "cost0:", cost0 # Now get the network cost for when we denormalize the # second collection inside of the first one # We should have a lower cost because there should now be fewer queries d1 = Design() for i in xrange(0, len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] self.assertTrue(col_info['interesting']) d1.addCollection(col_info['name']) if i == 0: d1.addShardKey(col_info['name'], col_info['interesting']) else: parent = self.collections[CostModelTestCase.COLLECTION_NAMES[0]] self.assertIsNotNone(parent) d1.setDenormalizationParent(col_info['name'], parent['name']) self.assertTrue(d1.isDenormalized(col_info['name']), col_info['name']) self.assertIsNotNone(d1.getDenormalizationParent(col_info['name'])) self.cm.invalidateCache(d1, col_info['name']) combiner = WorkloadCombiner(self.collections, self.workload) combinedWorkload = combiner.process(d1) self.state.updateWorkload(combinedWorkload) self.cm.reset() self.state.reset() cost1 = self.cm.getCost(d1) print "cost1:", cost1 self.assertLess(cost1, cost0) # The denormalization cost should also be the same as the cost # when we remove all of the ops one the second collection backup_collections = copy.deepcopy(self.collections) for sess in self.state.workload: for op in sess["operations"]: if op["collection"] <> CostModelTestCase.COLLECTION_NAMES[0]: sess["operations"].remove(op) ## FOR (op) ## FOR (sess) for i in xrange(1, len(CostModelTestCase.COLLECTION_NAMES)): del self.collections[CostModelTestCase.COLLECTION_NAMES[i]] print "deleted name: ", CostModelTestCase.COLLECTION_NAMES[i] self.cm.reset() self.state.reset() cost2 = self.cm.getCost(d1) print "cost2:", cost2 self.assertEqual(cost1, cost2) # Restore the original workload and see if the cost remains the same with the original one self.state.restoreOriginalWorkload() self.state.collections = backup_collections self.cm.reset() self.state.reset() cost3 = self.cm.getCost(d0) print "cost3:", cost3 self.assertEqual(cost3, cost0)