class TestNetworkCostTPCC(CostModelTestCase): def setUp(self): CostModelTestCase.setUp(self) self.cm = NetworkCostComponent(self.state) ## DEF def testNetworkCostDenormalization(self): """Check network cost for queries that reference denormalized collections""" # Get the "base" design cost when all of the collections # are sharded on their "interesting" fields d = Design() i = 0 for col_info in self.collections.itervalues(): d.addCollection(col_info['name']) if i == 0: d.addShardKey(col_info['name'], col_info['interesting']) else: d.addShardKey(col_info['name'], ["_id"]) self.cm.invalidateCache(d, col_info['name']) i += 1 ## FOR self.cm.reset() self.state.reset() cost0 = self.cm.getCost(d) print "cost0:", cost0 # Now get the network cost for when we denormalize the # second collection inside of the first one # We should have a lower cost because there should now be fewer queries d = Design() i = 0 for col_info in self.collections.itervalues(): self.assertTrue(col_info['interesting']) d.addCollection(col_info['name']) if i == 0: d.addShardKey(col_info['name'], col_info['interesting']) else: d.addShardKey(col_info['name'], ["_id"]) self.cm.invalidateCache(d, col_info['name']) i += 1 d.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS) combiner = WorkloadCombiner(self.collections, self.workload) combinedWorkload = combiner.process(d) self.state.updateWorkload(combinedWorkload) self.cm.reset() self.state.reset() cost1 = self.cm.getCost(d) print "cost1:", cost1 self.assertLess(cost1, cost0)
class TestNetworkCost(CostModelTestCase): def setUp(self): CostModelTestCase.setUp(self) self.cm = NetworkCostComponent(self.state) ## DEF def testNetworkCost(self): """Check network cost for equality predicate queries""" col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[0]] self.assertTrue(col_info['interesting']) # If we shard the collection on the interesting fields, then # each query should only need to touch one node d = Design() d.addCollection(col_info['name']) d.addShardKey(col_info['name'], col_info['interesting']) cost0 = self.cm.getCost(d) print "cost0: ", cost0 # If we now shard the collection on just '_id', then every query # should have to touch every node. The cost of this design # should be greater than the first one d = Design() d.addCollection(col_info['name']) d.addShardKey(col_info['name'], ['_id']) self.cm.invalidateCache(d, col_info['name']) self.state.reset() cost1 = self.cm.getCost(d) print "cost1: ", cost1 self.assertLess(cost0, cost1) ## DEF def testNetworkCostDenormalization(self): """Check network cost for queries that reference denormalized collections""" # Get the "base" design cost when all of the collections # are sharded on their "interesting" fields d0 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d0.addCollection(col_info['name']) if i == 0: d0.addShardKey(col_info['name'], col_info['interesting']) else: d0.addShardKey(col_info['name'], ["_id"]) self.cm.invalidateCache(d0, col_info['name']) ## FOR self.cm.reset() self.state.reset() cost0 = self.cm.getCost(d0) print "cost0:", cost0 # Now get the network cost for when we denormalize the # second collection inside of the first one # We should have a lower cost because there should now be fewer queries d1 = Design() for i in xrange(0, len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] self.assertTrue(col_info['interesting']) d1.addCollection(col_info['name']) if i == 0: d1.addShardKey(col_info['name'], col_info['interesting']) else: parent = self.collections[CostModelTestCase.COLLECTION_NAMES[0]] self.assertIsNotNone(parent) d1.setDenormalizationParent(col_info['name'], parent['name']) self.assertTrue(d1.isDenormalized(col_info['name']), col_info['name']) self.assertIsNotNone(d1.getDenormalizationParent(col_info['name'])) self.cm.invalidateCache(d1, col_info['name']) combiner = WorkloadCombiner(self.collections, self.workload) combinedWorkload = combiner.process(d1) self.state.updateWorkload(combinedWorkload) self.cm.reset() self.state.reset() cost1 = self.cm.getCost(d1) print "cost1:", cost1 self.assertLess(cost1, cost0) # The denormalization cost should also be the same as the cost # when we remove all of the ops one the second collection backup_collections = copy.deepcopy(self.collections) for sess in self.state.workload: for op in sess["operations"]: if op["collection"] <> CostModelTestCase.COLLECTION_NAMES[0]: sess["operations"].remove(op) ## FOR (op) ## FOR (sess) for i in xrange(1, len(CostModelTestCase.COLLECTION_NAMES)): del self.collections[CostModelTestCase.COLLECTION_NAMES[i]] print "deleted name: ", CostModelTestCase.COLLECTION_NAMES[i] self.cm.reset() self.state.reset() cost2 = self.cm.getCost(d1) print "cost2:", cost2 self.assertEqual(cost1, cost2) # Restore the original workload and see if the cost remains the same with the original one self.state.restoreOriginalWorkload() self.state.collections = backup_collections self.cm.reset() self.state.reset() cost3 = self.cm.getCost(d0) print "cost3:", cost3 self.assertEqual(cost3, cost0)