def createOrderStatus(self, params): ops = [ ] w_id = params["w_id"] d_id = params["d_id"] c_id = params["c_id"] c_last = params["c_last"] o_id = self.rng.randint(0, 10000) op = Session.operationFactory() responseContent = {} responseContent["C_W_ID"] = self.rng.randint(0, 100) responseContent["C_D_ID"] = self.rng.randint(0, 100) responseContent["C_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_CUSTOMER op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"C_W_ID": w_id, "C_D_ID": d_id, "C_ID": c_id}}] op['query_fields'] = {"C_ID": 1, "C_FIRST": 1, "C_MIDDLE": 1, "C_LAST": 1, "C_BALANCE": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["O_W_ID"] = self.rng.randint(0, 100) responseContent["O_D_ID"] = self.rng.randint(0, 100) responseContent["O_C_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDERS op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"O_W_ID": w_id, "O_D_ID": d_id, "O_C_ID": c_id}}] op['query_fields'] = {"O_ID": 1, "O_CARRIER_ID": 1, "O_ENTRY_D": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["OL_W_ID"] = self.rng.randint(0, 100) responseContent["OL_D_ID"] = self.rng.randint(0, 100) responseContent["OL_O_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDER_LINE op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"OL_W_ID": w_id, "OL_D_ID": d_id, "OL_O_ID": o_id}}] op['query_fields'] = {"OL_SUPPLY_W_ID": 1, "OL_I_ID": 1, "OL_QUANTITY": 1, "OL_AMOUNT": 1, "OL_DELIVERY_D": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) return ops
def createStockLevel(self, params): ops = [ ] w_id = params["w_id"] d_id = params["d_id"] o_id = self.rng.randint(0, 10000) ol_ids = [ self.rng.randint(0, 1000) for i in xrange(10) ] threshold = params["threshold"] op = Session.operationFactory() responseContent = {} responseContent["D_W_ID"] = self.rng.randint(0, 100) responseContent["D_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_DISTRICT op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"D_W_ID": w_id, "D_ID": d_id}}] op['query_fields'] = {"D_NEXT_O_ID": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["OL_W_ID"] = self.rng.randint(0, 100) responseContent["OL_D_ID"] = self.rng.randint(0, 100) responseContent["OL_O_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDER_LINE op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"OL_W_ID": w_id, "OL_D_ID": d_id, "OL_O_ID": {"#lt": o_id, "#gte": o_id-20}}}] op['query_fields'] = {"OL_I_ID": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["S_W_ID"] = self.rng.randint(0, 100) responseContent["S_I_ID"] = self.rng.randint(0, 100) responseContent["S_QUANTITY"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_STOCK op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"S_W_ID": w_id, "S_I_ID": {"#in": list(ol_ids)}, "S_QUANTITY": {"#lt": threshold}}}] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) return ops
def testEstimateNodesNullValue(self): """Check the estimating touched nodes when the sharding key value is null""" d = Design() for i in xrange(0, len(COLLECTION_NAMES)): col_info = self.collections[COLLECTION_NAMES[i]] d.addCollection(col_info['name']) # This key won't be in the operation's fields, but we should still # be able to get back a value d.addShardKey(col_info['name'], ['XXXX']) ## FOR # A query that looks up on a non-sharding key should always be # broadcast to every node sess = self.metadata_db.Session.fetch_one() op = sess['operations'][0] touched0 = list(self.estimator.estimateNodes(d, op)) # print "touched0:", touched0 self.assertListEqual(range(NUM_NODES), touched0) # But if we insert into that collection with a document that doesn't # have the sharding key, it should only go to one node op['type'] = constants.OP_TYPE_INSERT op['query_content'] = op['resp_content'] op['predicates'] = [] # pprint(op) touched1 = list(self.estimator.estimateNodes(d, op)) # print "touched1:", touched1 self.assertEqual(1, len(touched1)) # And if we insert another one, then we should get the same value back op = Session.operationFactory() op['collection'] = COLLECTION_NAMES[0] op['type'] = constants.OP_TYPE_INSERT op['query_id'] = 10000 op['query_content'] = [{"parkinglot": 1234}] op['resp_content'] = [{"ok": 1}] op['resp_id'] = 10001 # pprint(op) touched2 = list(self.estimator.estimateNodes(d, op)) self.assertEqual(1, len(touched2)) self.assertListEqual(touched1, touched2)
def testEstimateNodesNullValue(self): """Check the estimating touched nodes when the sharding key value is null""" d = Design() for i in xrange(0, len(COLLECTION_NAMES)): col_info = self.collections[COLLECTION_NAMES[i]] d.addCollection(col_info["name"]) # This key won't be in the operation's fields, but we should still # be able to get back a value d.addShardKey(col_info["name"], ["XXXX"]) ## FOR # A query that looks up on a non-sharding key should always be # broadcast to every node sess = self.metadata_db.Session.fetch_one() op = sess["operations"][0] touched0 = list(self.estimator.estimateNodes(d, op)) # print "touched0:", touched0 self.assertListEqual(range(NUM_NODES), touched0) # But if we insert into that collection with a document that doesn't # have the sharding key, it should only go to one node op["type"] = constants.OP_TYPE_INSERT op["query_content"] = op["resp_content"] op["predicates"] = [] # pprint(op) touched1 = list(self.estimator.estimateNodes(d, op)) # print "touched1:", touched1 self.assertEqual(1, len(touched1)) # And if we insert another one, then we should get the same value back op = Session.operationFactory() op["collection"] = COLLECTION_NAMES[0] op["type"] = constants.OP_TYPE_INSERT op["query_id"] = 10000 op["query_content"] = [{"parkinglot": 1234}] op["resp_content"] = [{"ok": 1}] op["resp_id"] = 10001 # pprint(op) touched2 = list(self.estimator.estimateNodes(d, op)) self.assertEqual(1, len(touched2)) self.assertListEqual(touched1, touched2)
def createNewOrder(self, params): ops = [] w_id = params["w_id"] d_id = params["d_id"] c_id = params["c_id"] o_entry_d = params["o_entry_d"] i_ids = params["i_ids"] i_w_ids = params["i_w_ids"] i_qtys = params["i_qtys"] s_dist_col = "S_DIST_%02d" % d_id w_tax = self.rng.random() d_tax = self.rng.random() d_next_o_id = self.rng.randint(0, 1000) c_discount = self.rng.randint(0, 10) ol_cnt = len(i_ids) o_carrier_id = tpccConstants.NULL_CARRIER_ID all_local = (not i_w_ids or [w_id] * len(i_w_ids) == i_w_ids) op = Session.operationFactory() responseContent = {"I_ID": self.rng.randint(0, 100)} op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ITEM op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query": {"I_ID": {"#in": i_ids}}}] op['query_fields'] = { "I_ID": 1, "I_PRICE": 1, "I_NAME": 1, "I_DATA": 1 } op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_WAREHOUSE op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query": {"W_ID": w_id}}] op['query_fields'] = {"W_TAX": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["D_ID"] = self.rng.randint(0, 100) responseContent["D_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_DISTRICT op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query": {"D_ID": d_id, "D_W_ID": w_id}}] op['query_fields'] = {"D_TAX": 1, "D_NEXT_O_ID": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["D_ID"] = self.rng.randint(0, 100) responseContent["D_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_DISTRICT op['type'] = constants.OP_TYPE_UPDATE op['query_id'] = self.nextQueryId() op['query_content'] = [{ "D_ID": d_id, "D_W_ID": w_id }, { "#inc": { "D_NEXT_O_ID": 1 } }] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() op['update_multi'] = False op['update_upsert'] = True ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["C_ID"] = self.rng.randint(0, 100) responseContent["C_D_ID"] = self.rng.randint(0, 100) responseContent["C_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_CUSTOMER op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{ "#query": { "C_ID": c_id, "C_D_ID": d_id, "C_W_ID": w_id } }] op['query_fields'] = {"C_DISCOUNT": 1, "C_LAST": 1, "C_CREDIT": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["NO_O_ID"] = self.rng.randint(0, 100) responseContent["NO_D_ID"] = self.rng.randint(0, 100) responseContent["NO_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_NEW_ORDER op['type'] = constants.OP_TYPE_INSERT op['query_id'] = self.nextQueryId() op['query_content'] = [{ "NO_O_ID": d_next_o_id, "NO_D_ID": d_id, "NO_W_ID": w_id }] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) o = { "O_D_ID": d_id, "O_W_ID": w_id, "O_C_ID": c_id, "O_ID": d_next_o_id, "O_ENTRY_D": o_entry_d, "O_CARRIER_ID": o_carrier_id, "O_OL_CNT": ol_cnt, "O_ALL_LOCAL": all_local } responseContent = { "O_D_ID": self.rng.randint(0, 100), "O_W_ID": self.rng.randint(0, 100), "O_C_ID": self.rng.randint(0, 100), "O_ID": self.rng.randint(0, 100), "O_ENTRY_D": self.rng.randint(0, 100), "O_CARRIER_ID": self.rng.randint(0, 100), "O_OL_CNT": self.rng.randint(0, 100), "O_ALL_LOCAL": self.rng.randint(0, 100) } op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDERS op['type'] = constants.OP_TYPE_INSERT op['query_id'] = self.nextQueryId() op['query_content'] = [o] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["S_I_ID"] = self.rng.randint(0, 100) responseContent["S_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_STOCK op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{ "#query": { "S_I_ID": { "#in": i_ids }, "S_W_ID": w_id } }] op['query_fields'] = { "S_I_ID": 1, "S_QUANTITY": 1, "S_DATA": 1, "S_YTD": 1, "S_ORDER_CNT": 1, "S_REMOTE_CNT": 1, s_dist_col: 1 } op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) for i in range(ol_cnt): s = {"S_I_ID": i_ids[i], "S_W_ID": w_id} ol = { "OL_D_ID": d_id, "OL_W_ID": w_id, "OL_O_ID": d_next_o_id, "OL_NUMBER": i + 1, "OL_I_ID": i_ids[i], "OL_SUPPLY_W_ID": i_w_ids[i], "OL_DELIVERY_D": o_entry_d, "OL_QUANTITY": i_qtys[i], "OL_AMOUNT": self.rng.random() * 100, "OL_DIST_INFO": ''.join( self.rng.choice(string.ascii_uppercase) for x in range(24)) } s_remote_cnt = self.rng.randint(0, 10) s_order_cnt = self.rng.randint(0, 10) s_quantity = self.rng.randint(0, 10) s_ytd = self.rng.random() op = Session.operationFactory() responseContent = {} responseContent["S_I_ID"] = self.rng.randint(0, 100) responseContent["S_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_STOCK op['type'] = constants.OP_TYPE_UPDATE op['query_id'] = self.nextQueryId() op['query_content'] = [ s, { "#set": { "S_QUANTITY": s_quantity, "S_YTD": s_ytd, "S_ORDER_CNT": s_order_cnt, "S_REMOTE_CNT": s_remote_cnt } } ] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() op['update_upsert'] = True ops.append(op) ## FOR return ops
def setUp(self): MongoDBTestCase.setUp(self) field00_value = 0 field01_value = 0 field02_value = 9999999 # WORKLOAD timestamp = time.time() for i in xrange(CostModelTestCase.NUM_SESSIONS): sess = self.metadata_db.Session() sess['session_id'] = i sess['ip_client'] = "client:%d" % (1234+i) sess['ip_server'] = "server:5678" sess['start_time'] = timestamp for j in xrange(0, len(CostModelTestCase.COLLECTION_NAMES)): _id = str(random.random()) queryId = long((i<<16) + j) queryContent = { } queryPredicates = { } responseContent = {"_id": _id} responseId = (queryId<<8) for f in xrange(0, CostModelTestCase.NUM_FIELDS): f_name = "field%02d" % f if f == 0: responseContent[f_name] = field00_value queryContent[f_name] = responseContent[f_name] queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY field00_value += 1 elif f == 1: responseContent[f_name] = field01_value queryContent[f_name] = responseContent[f_name] queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY field01_value += 1 else: responseContent[f_name] = field02_value queryContent[f_name] = responseContent[f_name] queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY field02_value -= 1 ## ELSE ## FOR queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAMES[j] op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) ## FOR (ops) sess['end_time'] = timestamp timestamp += 2 sess.save() ## FOR (sess) # Use the MongoSniffConverter to populate our metadata converter = MongoSniffConverter(self.metadata_db, self.dataset_db) converter.no_mongo_parse = True converter.no_mongo_sessionizer = True converter.process() self.assertEqual(CostModelTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count()) self.collections = dict([ (c['name'], c) for c in self.metadata_db.Collection.fetch()]) self.assertEqual(len(CostModelTestCase.COLLECTION_NAMES), len(self.collections)) populated_workload = list(c for c in self.metadata_db.Session.fetch()) self.workload = populated_workload # Increase the database size beyond what the converter derived from the workload for col_name, col_info in self.collections.iteritems(): col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS col_info['avg_doc_size'] = 1024 # bytes col_info['max_pages'] = col_info['doc_count'] * col_info['avg_doc_size'] / (4 * 1024) col_info.save() # print pformat(col_info) self.costModelConfig = { 'max_memory': 1024, # MB 'skew_intervals': CostModelTestCase.NUM_INTERVALS, 'address_size': 64, 'nodes': CostModelTestCase.NUM_NODES, 'window_size': 1024 } self.state = State(self.collections, populated_workload, self.costModelConfig) ## DEF ## CLASS
def reconstructMetaData(self, changed_fields, fieldscol2col): """ Since we have re-constructed the database """ LOG.info("Reconstructing metadata!") if len(changed_fields) == 0: return op_counter = 0 col2fields = self.generateDict(changed_fields) for sess in self.metadata_db.Session.fetch(): for i in xrange(len(sess['operations']) - 1, - 1, -1): op_counter += 1 offset = 1 # indicate where we should insert the splitted operation. It depends on if we remove the current operation op = sess['operations'][i] col_name = op['collection'] fields = col2fields.get(col_name, None) # If this op's collection has no fields in moved_fields, skip it try: if fields: payload = op["query_content"] # payload is a list type changed_query = [ ] counter = 0 while counter < len(payload): doc = payload[counter] # doc is a dict type for key, value in doc.iteritems(): if type(value) == dict: for k in value.iterkeys(): if k in fields: LOG.debug("counter: %d, key: %s, value: %s", counter, key, k) changed_query.append((counter, key, k)) ## IF ## FOR ## IF else: if value in fields: LOG.info("counter: %d, key: %s, value: %s", counter, key, k) changed_query.append((counter, key, value)) ## IF ## ELSE ## FOR counter += 1 # WHILE # If we have queries to split if len(changed_query) > 0: # construct new queries for tup in changed_query: old_query_content = payload[tup[0]][tup[1]].pop(tup[2]) # If the doc is empty after the pop, remove it from the payload if len(payload[tup[0]][tup[1]]) == 0: payload[tup[0]].pop(tup[1]) if len(payload[tup[0]]) == 0: payload.remove(payload[tup[0]]) # If the payload is empty, we remove the op from the session queue if len(payload) == 0: sess['operations'].remove(op) offset -= 1 ## IF ## IF new_op = Session.operationFactory() new_col = fieldscol2col[(col_name, tup[2])] LOG.debug("Creating a new operation to collection: %s", new_col) new_op['collection'] = new_col new_op['type'] = op['type'] new_op['query_id'] = long(hash(time.time())) new_op['query_content'] = [ {tup[1] : {tup[2] : old_query_content}} ] new_op['resp_content'] = new_op['query_content'] new_op['resp_id'] = new_op['query_id'] + 1 new_op['predicates'] = op['predicates'] new_op['query_time'] = op['query_time'] new_op['resp_time'] = op['resp_time'] # add the new query after the current one of the session queue sess['operations'].insert(i + offset, new_op) ## FOR ## IF ## IF except: LOG.error("Error happened when process op: %s", pformat(op)) raise ## FOR try: sess.save() except: LOG.error("Failed to save session!\n%s", pformat(sess)) raise
def setUp(self): MongoDBTestCase.setUp(self) # WORKLOAD timestamp = time.time() for i in xrange(CostModelTestCase.NUM_SESSIONS): sess = self.metadata_db.Session() sess['session_id'] = i sess['ip_client'] = "client:%d" % (1234 + i) sess['ip_server'] = "server:5678" sess['start_time'] = timestamp for j in xrange(0, len(CostModelTestCase.COLLECTION_NAMES)): _id = str(random.random()) queryId = long((i << 16) + j) queryContent = {} queryPredicates = {} responseContent = {"_id": _id} responseId = (queryId << 8) for f in xrange(0, CostModelTestCase.NUM_FIELDS): if j == 0: f_name = "field%02d" % 0 elif j == 1: f_name = "field%02d" % 1 else: f_name = "field%02d" % 2 responseContent[f_name] = random.randint(0, 100) queryContent[f_name] = responseContent[f_name] queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAMES[j] op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) ## FOR (ops) sess['end_time'] = timestamp timestamp += 2 sess.save() ## FOR (sess) # Use the MongoSniffConverter to populate our metadata converter = MongoSniffConverter(self.metadata_db, self.dataset_db) converter.no_mongo_parse = True converter.no_mongo_sessionizer = True converter.process() self.assertEqual(CostModelTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count()) self.collections = dict([(c['name'], c) for c in self.metadata_db.Collection.fetch()]) self.assertEqual(len(CostModelTestCase.COLLECTION_NAMES), len(self.collections)) populated_workload = list(c for c in self.metadata_db.Session.fetch()) self.workload = populated_workload # Increase the database size beyond what the converter derived from the workload for col_name, col_info in self.collections.iteritems(): col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS col_info['avg_doc_size'] = 1024 # bytes col_info['max_pages'] = col_info['doc_count'] * col_info[ 'avg_doc_size'] / (4 * 1024) col_info.save() # print pformat(col_info) self.costModelConfig = { 'max_memory': 1024, # MB 'skew_intervals': CostModelTestCase.NUM_INTERVALS, 'address_size': 64, 'nodes': CostModelTestCase.NUM_NODES, 'window_size': 3 } self.state = State(self.collections, populated_workload, self.costModelConfig)
def setUp(self): MongoDBTestCase.setUp(self) # WORKLOAD self.workload = [] timestamp = time.time() for i in xrange(0, NUM_SESSIONS): sess = self.metadata_db.Session() sess['session_id'] = i sess['ip_client'] = "client:%d" % (1234 + i) sess['ip_server'] = "server:5678" sess['start_time'] = timestamp for j in xrange(0, len(COLLECTION_NAMES)): _id = str(random.random()) queryId = long((i << 16) + j) queryContent = {} queryPredicates = {} responseContent = {"_id": _id} responseId = (queryId << 8) for f in xrange(0, NUM_FIELDS): f_name = "field%02d" % f if f % 2 == 0: responseContent[f_name] = random.randint(0, 100) queryContent[f_name] = responseContent[f_name] queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY else: responseContent[f_name] = str( random.randint(1000, 100000)) ## FOR queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = COLLECTION_NAMES[j] op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) ## FOR (ops) sess['end_time'] = timestamp timestamp += 2 sess.save() self.workload.append(sess) ## FOR (sess) # Use the MongoSniffConverter to populate our metadata converter = MongoSniffConverter(self.metadata_db, self.dataset_db) converter.no_mongo_parse = True converter.no_mongo_sessionizer = True converter.process() self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count()) self.collections = dict([(c['name'], c) for c in self.metadata_db.Collection.fetch()]) self.assertEqual(len(COLLECTION_NAMES), len(self.collections)) self.estimator = NodeEstimator(self.collections, NUM_NODES)
def storeCurrentOpInSession(self): """Stores the currentOp in a session. We will create a new session if one does not already exist.""" # Check whether it has a busted collection name # For now we'll just change the name to our marker so that we can figure out # what it really should be after we recreate the schema try: self.currentOp['collection'].decode('ascii') except Exception as err: if self.debug: LOG.warn("Operation %(query_id)d has an invalid collection name '%(collection)s'. Will fix later... [opCtr=%(op_ctr)d / lineCtr=%(line_ctr)d]" % self.currentOp) self.currentOp['collection'] = constants.INVALID_COLLECTION_MARKER self.bustedOps.append(self.currentOp) pass # Figure out whether this is a outgoing query from the client # Or an incoming response from the server if self.currentOp['arrow'] == '-->>': ip_client = self.currentOp['ip1'] ip_server = self.currentOp['ip2'] else: ip_client = self.currentOp['ip2'] ip_server = self.currentOp['ip1'] # If this doesn't have a type here, then we know that it's a reply if not 'type' in self.currentOp: self.currentOp['type'] = constants.OP_TYPE_REPLY ## IF if not 'type' in self.currentOp: msg = "Current operation is incomplete on line %d: Missing 'type' field" % self.line_ctr LOG.warn("%s [opCtr=%d]\n%s" % (msg, self.op_ctr, pformat(self.currentOp))) if self.stop_on_error: raise Exception(msg) return ## IF # Get the session to store this operation in session = self.getOrCreateSession(ip_client, ip_server) if session["start_time"] is None and "timestamp" in self.currentOp: session["start_time"] = self.currentOp['timestamp'] # Escape any invalid key names for i in xrange(0, len(self.currentContent)): # HACK: Rename the 'query' key to '$query' if 'query' in self.currentContent[i]: self.currentContent[i][constants.OP_TYPE_QUERY] = self.currentContent[i]['query'] del self.currentContent[i]['query'] self.currentContent[i] = util.escapeFieldNames(self.currentContent[i]) ## FOR # QUERY: $query, $delete, $insert, $update: # Create the operation, add it to the session if self.currentOp['type'] in [constants.OP_TYPE_QUERY, constants.OP_TYPE_INSERT, constants.OP_TYPE_DELETE, constants.OP_TYPE_UPDATE]: # create the operation -- corresponds to current if self.debug: LOG.debug("Current Operation %d Content:\n%s" % (self.currentOp['query_id'], pformat(self.currentContent))) op = Session.operationFactory() op['collection'] = self.currentOp['collection'] op['type'] = self.currentOp['type'] op['query_time'] = self.currentOp['timestamp'] op['query_size'] = self.currentOp['size'] op['query_content'] = self.currentContent op['query_id'] = long(self.currentOp['query_id']) op['query_aggregate'] = False # false -not aggregate- by default # UPDATE Flags if op['type'] == constants.OP_TYPE_UPDATE: op['update_upsert'] = self.currentOp['update_upsert'] op['update_multi'] = self.currentOp['update_multi'] # QUERY Flags elif op['type'] == constants.OP_TYPE_QUERY: # SKIP, LIMIT op['query_limit'] = self.currentOp['ntoreturn'] op['query_offset'] = self.currentOp['ntoskip'] if self.currentOp['hasfields']: # HACK: Convert dot notation into '*' # FIXME: This should really be broke out into a dictionary of 'include' 'exclude' op['query_fields'] = dict([ (k.replace(".", "*"), v) for k,v in self.currentOp['hasfields'].iteritems()]) # check for aggregate # update collection name, set aggregate type if op['collection'].find("$cmd") > 0: op['query_aggregate'] = True # extract the real collection name ## --> This has to be done at the end after the first pass, because the collection name is hashed up # Keep track of operations by their ids so that we can add # the response to it later on self.query_response_map[self.currentOp['query_id']] = op # Append it to the current session # TODO: Large traces will cause the sessions to get too big. # We need to split out the operations into a seperate collection # Or use multiple sessions session['operations'].append(op) self.op_ctr += 1 if self.debug: LOG.debug("Added %s operation %d to session %s from line %d:\n%s" % (op['type'], self.currentOp['query_id'], session['session_id'], self.line_ctr, pformat(op))) # store the collection name in known_collections. This will be useful later. # see the comment at known_collections # HACK: We have to cut off the db name here. We may not want # to do that if the application is querying multiple databases. full_name = op['collection'] col_name = full_name[full_name.find(".")+1:] # cut off the db name self.known_collections.add(col_name) # RESPONSE - add information to the matching query elif self.currentOp['type'] == constants.OP_TYPE_REPLY: self.resp_ctr += 1 reply_id = self.currentOp['reply_id']; # see if the matching query is in the map if reply_id in self.query_response_map: # fill in missing information query_op = self.query_response_map[reply_id] query_op['resp_content'] = self.currentContent query_op['resp_size'] = self.currentOp['size'] query_op['resp_time'] = self.currentOp['timestamp'] query_op['resp_id'] = long(self.currentOp['query_id']) del self.query_response_map[reply_id] else: self.skip_ctr += 1 if self.debug: LOG.warn("Skipping response on line %d - No matching query_id '%s' [skipCtr=%d/%d]" % (self.line_ctr, reply_id, self.skip_ctr, self.resp_ctr)) # These can be safely ignored elif self.currentOp['type'] in [constants.OP_TYPE_GETMORE, constants.OP_TYPE_KILLCURSORS]: if self.debug: LOG.warn("Skipping '%s' operation %d on line %d" % (self.currentOp['type'], self.currentOp['query_id'], self.line_ctr)) # UNKNOWN else: raise Exception("Unexpected message type '%s'" % self.currentOp['type']) return
def createStockLevel(self, params): ops = [] w_id = params["w_id"] d_id = params["d_id"] o_id = self.rng.randint(0, 10000) ol_ids = [self.rng.randint(0, 1000) for i in xrange(10)] threshold = params["threshold"] op = Session.operationFactory() responseContent = {} responseContent["D_W_ID"] = self.rng.randint(0, 100) responseContent["D_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_DISTRICT op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query": {"D_W_ID": w_id, "D_ID": d_id}}] op['query_fields'] = {"D_NEXT_O_ID": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["OL_W_ID"] = self.rng.randint(0, 100) responseContent["OL_D_ID"] = self.rng.randint(0, 100) responseContent["OL_O_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDER_LINE op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{ "#query": { "OL_W_ID": w_id, "OL_D_ID": d_id, "OL_O_ID": { "#lt": o_id, "#gte": o_id - 20 } } }] op['query_fields'] = {"OL_I_ID": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["S_W_ID"] = self.rng.randint(0, 100) responseContent["S_I_ID"] = self.rng.randint(0, 100) responseContent["S_QUANTITY"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_STOCK op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{ "#query": { "S_W_ID": w_id, "S_I_ID": { "#in": list(ol_ids) }, "S_QUANTITY": { "#lt": threshold } } }] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) return ops
def createDelivery(self, params): ops = [] w_id = params["w_id"] o_carrier_id = params["o_carrier_id"] ol_delivery_d = params["ol_delivery_d"] for d_id in xrange(1, tpccConstants.DISTRICTS_PER_WAREHOUSE + 1): c_id = self.rng.randint(0, 10000) o_id = self.rng.randint(0, 10000) ol_total = self.rng.random() * 100 op = Session.operationFactory() responseContent = {} responseContent["NO_D_ID"] = self.rng.randint(0, 100) responseContent["NO_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_NEW_ORDER op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{ "#query": { "NO_D_ID": d_id, "NO_W_ID": w_id } }] op['query_fields'] = {"NO_O_ID": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["O_ID"] = self.rng.randint(0, 100) responseContent["O_D_ID"] = self.rng.randint(0, 100) responseContent["O_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDERS op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{ "#query": { "O_ID": o_id, "O_D_ID": d_id, "O_W_ID": w_id } }] op['query_fields'] = {"O_C_ID": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["OL_O_ID"] = self.rng.randint(0, 100) responseContent["OL_D_ID"] = self.rng.randint(0, 100) responseContent["OL_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDER_LINE op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{ "#query": { "OL_O_ID": o_id, "OL_D_ID": d_id, "OL_W_ID": w_id } }] op['query_fields'] = {"OL_AMOUNT": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["O_ID"] = self.rng.randint(0, 100) responseContent["O_D_ID"] = self.rng.randint(0, 100) responseContent["O_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDERS op['type'] = constants.OP_TYPE_UPDATE op['query_id'] = self.nextQueryId() op['query_content'] = [{ "O_ID": o_id, "O_D_ID": d_id, "O_W_ID": w_id }, { "#set": { "O_CARRIER_ID": o_carrier_id } }] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() op['update_multi'] = False op['update_upsert'] = True ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["OL_O_ID"] = self.rng.randint(0, 100) responseContent["OL_D_ID"] = self.rng.randint(0, 100) responseContent["OL_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDER_LINE op['type'] = constants.OP_TYPE_UPDATE op['query_id'] = self.nextQueryId() op['query_content'] = [{ "OL_O_ID": o_id, "OL_D_ID": d_id, "OL_W_ID": w_id }, { "#set": { "OL_DELIVERY_D": ol_delivery_d } }] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() op['update_multi'] = True op['update_upsert'] = True ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["C_ID"] = self.rng.randint(0, 100) responseContent["C_D_ID"] = self.rng.randint(0, 100) responseContent["C_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_CUSTOMER op['type'] = constants.OP_TYPE_UPDATE op['query_id'] = self.nextQueryId() op['query_content'] = [{ "C_ID": c_id, "C_D_ID": d_id, "C_W_ID": w_id }, { "#inc": { "C_BALANCE": ol_total } }] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() op['update_multi'] = False op['update_upsert'] = True ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["NO_D_ID"] = self.rng.randint(0, 100) responseContent["NO_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_NEW_ORDER op['type'] = constants.OP_TYPE_DELETE op['query_id'] = self.nextQueryId() op['query_content'] = [{"NO_D_ID": d_id, "NO_W_ID": w_id}] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) ## FOR return ops
def createDelivery(self, params): ops = [ ] w_id = params["w_id"] o_carrier_id = params["o_carrier_id"] ol_delivery_d = params["ol_delivery_d"] for d_id in xrange(1, tpccConstants.DISTRICTS_PER_WAREHOUSE+1): c_id = self.rng.randint(0, 10000) o_id = self.rng.randint(0, 10000) ol_total = self.rng.random() * 100 op = Session.operationFactory() responseContent = {} responseContent["NO_D_ID"] = self.rng.randint(0, 100) responseContent["NO_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_NEW_ORDER op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"NO_D_ID": d_id, "NO_W_ID": w_id}}] op['query_fields'] = {"NO_O_ID": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["O_ID"] = self.rng.randint(0, 100) responseContent["O_D_ID"] = self.rng.randint(0, 100) responseContent["O_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDERS op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"O_ID": o_id, "O_D_ID": d_id, "O_W_ID": w_id}}] op['query_fields'] = {"O_C_ID": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["OL_O_ID"] = self.rng.randint(0, 100) responseContent["OL_D_ID"] = self.rng.randint(0, 100) responseContent["OL_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDER_LINE op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"OL_O_ID": o_id, "OL_D_ID": d_id, "OL_W_ID": w_id}}] op['query_fields'] = {"OL_AMOUNT": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["O_ID"] = self.rng.randint(0, 100) responseContent["O_D_ID"] = self.rng.randint(0, 100) responseContent["O_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDERS op['type'] = constants.OP_TYPE_UPDATE op['query_id'] = self.nextQueryId() op['query_content'] = [{"O_ID": o_id, "O_D_ID": d_id, "O_W_ID": w_id}, {"#set": {"O_CARRIER_ID": o_carrier_id}} ] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() op['update_multi'] = False op['update_upsert'] = True ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["OL_O_ID"] = self.rng.randint(0, 100) responseContent["OL_D_ID"] = self.rng.randint(0, 100) responseContent["OL_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDER_LINE op['type'] = constants.OP_TYPE_UPDATE op['query_id'] = self.nextQueryId() op['query_content'] = [{"OL_O_ID": o_id, "OL_D_ID": d_id, "OL_W_ID": w_id}, {"#set": {"OL_DELIVERY_D": ol_delivery_d}}] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() op['update_multi'] = True op['update_upsert'] = True ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["C_ID"] = self.rng.randint(0, 100) responseContent["C_D_ID"] = self.rng.randint(0, 100) responseContent["C_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_CUSTOMER op['type'] = constants.OP_TYPE_UPDATE op['query_id'] = self.nextQueryId() op['query_content'] = [{"C_ID": c_id, "C_D_ID": d_id, "C_W_ID": w_id}, {"#inc": {"C_BALANCE": ol_total}}] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() op['update_multi'] = False op['update_upsert'] = True ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["NO_D_ID"] = self.rng.randint(0, 100) responseContent["NO_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_NEW_ORDER op['type'] = constants.OP_TYPE_DELETE op['query_id'] = self.nextQueryId() op['query_content'] = [{"NO_D_ID": d_id, "NO_W_ID": w_id}] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) ## FOR return ops
def createNewOrder(self, params): ops = [ ] w_id = params["w_id"] d_id = params["d_id"] c_id = params["c_id"] o_entry_d = params["o_entry_d"] i_ids = params["i_ids"] i_w_ids = params["i_w_ids"] i_qtys = params["i_qtys"] s_dist_col = "S_DIST_%02d" % d_id w_tax = self.rng.random() d_tax = self.rng.random() d_next_o_id = self.rng.randint(0, 1000) c_discount = self.rng.randint(0, 10) ol_cnt = len(i_ids) o_carrier_id = tpccConstants.NULL_CARRIER_ID all_local = (not i_w_ids or [w_id] * len(i_w_ids) == i_w_ids) op = Session.operationFactory() responseContent = {"I_ID": self.rng.randint(0, 100)} op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ITEM op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"I_ID": {"#in": i_ids}}}] op['query_fields'] = {"I_ID": 1, "I_PRICE": 1, "I_NAME": 1, "I_DATA": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_WAREHOUSE op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"W_ID": w_id}}] op['query_fields'] = {"W_TAX": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["D_ID"] = self.rng.randint(0, 100) responseContent["D_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_DISTRICT op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"D_ID": d_id, "D_W_ID": w_id}}] op['query_fields'] = {"D_TAX": 1, "D_NEXT_O_ID": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["D_ID"] = self.rng.randint(0, 100) responseContent["D_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_DISTRICT op['type'] = constants.OP_TYPE_UPDATE op['query_id'] = self.nextQueryId() op['query_content'] = [{"D_ID": d_id, "D_W_ID": w_id}, {"#inc": {"D_NEXT_O_ID": 1}}] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() op['update_multi'] = False op['update_upsert'] = True ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["C_ID"] = self.rng.randint(0, 100) responseContent["C_D_ID"] = self.rng.randint(0, 100) responseContent["C_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_CUSTOMER op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"C_ID": c_id, "C_D_ID": d_id, "C_W_ID": w_id}}] op['query_fields'] = {"C_DISCOUNT": 1, "C_LAST": 1, "C_CREDIT": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["NO_O_ID"] = self.rng.randint(0, 100) responseContent["NO_D_ID"] = self.rng.randint(0, 100) responseContent["NO_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_NEW_ORDER op['type'] = constants.OP_TYPE_INSERT op['query_id'] = self.nextQueryId() op['query_content'] = [{"NO_O_ID": d_next_o_id, "NO_D_ID": d_id, "NO_W_ID": w_id}] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) o = { "O_D_ID": d_id, "O_W_ID": w_id, "O_C_ID": c_id, "O_ID": d_next_o_id, "O_ENTRY_D": o_entry_d, "O_CARRIER_ID": o_carrier_id, "O_OL_CNT": ol_cnt, "O_ALL_LOCAL": all_local } responseContent = { "O_D_ID": self.rng.randint(0, 100), "O_W_ID": self.rng.randint(0, 100), "O_C_ID": self.rng.randint(0, 100), "O_ID": self.rng.randint(0, 100), "O_ENTRY_D": self.rng.randint(0, 100), "O_CARRIER_ID": self.rng.randint(0, 100), "O_OL_CNT": self.rng.randint(0, 100), "O_ALL_LOCAL": self.rng.randint(0, 100) } op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDERS op['type'] = constants.OP_TYPE_INSERT op['query_id'] = self.nextQueryId() op['query_content'] = [o] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["S_I_ID"] = self.rng.randint(0, 100) responseContent["S_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_STOCK op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"S_I_ID": {"#in": i_ids}, "S_W_ID": w_id}}] op['query_fields'] = {"S_I_ID": 1, "S_QUANTITY": 1, "S_DATA": 1, "S_YTD": 1, "S_ORDER_CNT": 1, "S_REMOTE_CNT": 1, s_dist_col: 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) for i in range(ol_cnt): s = {"S_I_ID": i_ids[i], "S_W_ID": w_id} ol = { "OL_D_ID": d_id, "OL_W_ID": w_id, "OL_O_ID": d_next_o_id, "OL_NUMBER": i + 1, "OL_I_ID": i_ids[i], "OL_SUPPLY_W_ID": i_w_ids[i], "OL_DELIVERY_D": o_entry_d, "OL_QUANTITY": i_qtys[i], "OL_AMOUNT": self.rng.random() * 100, "OL_DIST_INFO": ''.join(self.rng.choice(string.ascii_uppercase) for x in range(24)) } s_remote_cnt = self.rng.randint(0, 10) s_order_cnt = self.rng.randint(0, 10) s_quantity = self.rng.randint(0, 10) s_ytd = self.rng.random() op = Session.operationFactory() responseContent = {} responseContent["S_I_ID"] = self.rng.randint(0, 100) responseContent["S_W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_STOCK op['type'] = constants.OP_TYPE_UPDATE op['query_id'] = self.nextQueryId() op['query_content'] = [s, {"#set": {"S_QUANTITY": s_quantity, "S_YTD": s_ytd, "S_ORDER_CNT": s_order_cnt, "S_REMOTE_CNT": s_remote_cnt}}] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() op['update_upsert'] = True ops.append(op) ## FOR return ops
def setUp(self): MongoDBTestCase.setUp(self) # WORKLOAD self.workload = [] timestamp = time.time() for i in xrange(0, NUM_SESSIONS): sess = self.metadata_db.Session() sess["session_id"] = i sess["ip_client"] = "client:%d" % (1234 + i) sess["ip_server"] = "server:5678" sess["start_time"] = timestamp for j in xrange(0, len(COLLECTION_NAMES)): _id = str(random.random()) queryId = long((i << 16) + j) queryContent = {} queryPredicates = {} responseContent = {"_id": _id} responseId = queryId << 8 for f in xrange(0, NUM_FIELDS): f_name = "field%02d" % f if f % 2 == 0: responseContent[f_name] = random.randint(0, 100) queryContent[f_name] = responseContent[f_name] queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY else: responseContent[f_name] = str(random.randint(1000, 100000)) ## FOR queryContent = {constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent} op = Session.operationFactory() op["collection"] = COLLECTION_NAMES[j] op["type"] = constants.OP_TYPE_QUERY op["query_id"] = queryId op["query_content"] = [queryContent] op["resp_content"] = [responseContent] op["resp_id"] = responseId op["predicates"] = queryPredicates op["query_time"] = timestamp timestamp += 1 op["resp_time"] = timestamp sess["operations"].append(op) ## FOR (ops) sess["end_time"] = timestamp timestamp += 2 sess.save() self.workload.append(sess) ## FOR (sess) # Use the MongoSniffConverter to populate our metadata converter = MongoSniffConverter(self.metadata_db, self.dataset_db) converter.no_mongo_parse = True converter.no_mongo_sessionizer = True converter.process() self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count()) self.collections = dict([(c["name"], c) for c in self.metadata_db.Collection.fetch()]) self.assertEqual(len(COLLECTION_NAMES), len(self.collections)) self.estimator = NodeEstimator(self.collections, NUM_NODES)
def storeCurrentOpInSession(self): """Stores the currentOp in a session. We will create a new session if one does not already exist.""" # Check whether it has a busted collection name # For now we'll just change the name to our marker so that we can figure out # what it really should be after we recreate the schema try: self.currentOp['collection'].decode('ascii') except Exception as err: if self.debug: LOG.warn( "Operation %(query_id)d has an invalid collection name '%(collection)s'. Will fix later... [opCtr=%(op_ctr)d / lineCtr=%(line_ctr)d]" % self.currentOp) self.currentOp['collection'] = constants.INVALID_COLLECTION_MARKER self.bustedOps.append(self.currentOp) pass # Figure out whether this is a outgoing query from the client # Or an incoming response from the server if self.currentOp['arrow'] == '-->>': ip_client = self.currentOp['ip1'] ip_server = self.currentOp['ip2'] else: ip_client = self.currentOp['ip2'] ip_server = self.currentOp['ip1'] # If this doesn't have a type here, then we know that it's a reply if not 'type' in self.currentOp: self.currentOp['type'] = constants.OP_TYPE_REPLY ## IF if not 'type' in self.currentOp: msg = "Current operation is incomplete on line %d: Missing 'type' field" % self.line_ctr LOG.warn("%s [opCtr=%d]\n%s" % (msg, self.op_ctr, pformat(self.currentOp))) if self.stop_on_error: raise Exception(msg) return ## IF # Get the session to store this operation in session = self.getOrCreateSession(ip_client, ip_server) if session["start_time"] is None and "timestamp" in self.currentOp: session["start_time"] = self.currentOp['timestamp'] # Escape any invalid key names for i in xrange(0, len(self.currentContent)): # HACK: Rename the 'query' key to '$query' if 'query' in self.currentContent[i]: self.currentContent[i][ constants.OP_TYPE_QUERY] = self.currentContent[i]['query'] del self.currentContent[i]['query'] self.currentContent[i] = util.escapeFieldNames( self.currentContent[i]) ## FOR # QUERY: $query, $delete, $insert, $update: # Create the operation, add it to the session if self.currentOp['type'] in [ constants.OP_TYPE_QUERY, constants.OP_TYPE_INSERT, constants.OP_TYPE_DELETE, constants.OP_TYPE_UPDATE ]: # create the operation -- corresponds to current if self.debug: LOG.debug( "Current Operation %d Content:\n%s" % (self.currentOp['query_id'], pformat(self.currentContent))) op = Session.operationFactory() op['collection'] = self.currentOp['collection'] op['type'] = self.currentOp['type'] op['query_time'] = self.currentOp['timestamp'] op['query_size'] = self.currentOp['size'] op['query_content'] = self.currentContent op['query_id'] = long(self.currentOp['query_id']) op['query_aggregate'] = False # false -not aggregate- by default # UPDATE Flags if op['type'] == constants.OP_TYPE_UPDATE: op['update_upsert'] = self.currentOp['update_upsert'] op['update_multi'] = self.currentOp['update_multi'] # QUERY Flags elif op['type'] == constants.OP_TYPE_QUERY: # SKIP, LIMIT op['query_limit'] = self.currentOp['ntoreturn'] op['query_offset'] = self.currentOp['ntoskip'] if self.currentOp['hasfields']: # HACK: Convert dot notation into '*' # FIXME: This should really be broke out into a dictionary of 'include' 'exclude' op['query_fields'] = dict([ (k.replace(".", "*"), v) for k, v in self.currentOp['hasfields'].iteritems() ]) # check for aggregate # update collection name, set aggregate type if op['collection'].find("$cmd") > 0: op['query_aggregate'] = True # extract the real collection name ## --> This has to be done at the end after the first pass, because the collection name is hashed up # Keep track of operations by their ids so that we can add # the response to it later on self.query_response_map[self.currentOp['query_id']] = op # Append it to the current session # TODO: Large traces will cause the sessions to get too big. # We need to split out the operations into a seperate collection # Or use multiple sessions session['operations'].append(op) self.op_ctr += 1 if self.debug: LOG.debug( "Added %s operation %d to session %s from line %d:\n%s" % (op['type'], self.currentOp['query_id'], session['session_id'], self.line_ctr, pformat(op))) # store the collection name in known_collections. This will be useful later. # see the comment at known_collections # HACK: We have to cut off the db name here. We may not want # to do that if the application is querying multiple databases. full_name = op['collection'] col_name = full_name[full_name.find(".") + 1:] # cut off the db name self.known_collections.add(col_name) # RESPONSE - add information to the matching query elif self.currentOp['type'] == constants.OP_TYPE_REPLY: self.resp_ctr += 1 reply_id = self.currentOp['reply_id'] # see if the matching query is in the map if reply_id in self.query_response_map: # fill in missing information query_op = self.query_response_map[reply_id] query_op['resp_content'] = self.currentContent query_op['resp_size'] = self.currentOp['size'] query_op['resp_time'] = self.currentOp['timestamp'] query_op['resp_id'] = long(self.currentOp['query_id']) del self.query_response_map[reply_id] else: self.skip_ctr += 1 if self.debug: LOG.warn( "Skipping response on line %d - No matching query_id '%s' [skipCtr=%d/%d]" % (self.line_ctr, reply_id, self.skip_ctr, self.resp_ctr)) # These can be safely ignored elif self.currentOp['type'] in [ constants.OP_TYPE_GETMORE, constants.OP_TYPE_KILLCURSORS ]: if self.debug: LOG.warn("Skipping '%s' operation %d on line %d" % (self.currentOp['type'], self.currentOp['query_id'], self.line_ctr)) # UNKNOWN else: raise Exception("Unexpected message type '%s'" % self.currentOp['type']) return
def setUp(self): MongoDBTestCase.setUp(self) # WORKLOAD timestamp = time.time() sess = self.metadata_db.Session() sess['session_id'] = 0 sess['ip_client'] = "client:%d" % (1234+0) sess['ip_server'] = "server:5678" sess['start_time'] = timestamp # generate query 0 querying field00 _id = str(random.random()) queryId = long((0<<16) + 0) queryContent = { } queryPredicates = { } projectionField = { } responseContent = {"_id": _id} responseId = (queryId<<8) responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY projectionField['field02'] = random.randint(0, 100) queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) # generate query 1 querying field01 _id = str(random.random()) queryId = long((1<<16) + 1) queryContent = { } queryPredicates = { } responseContent = {"_id": _id} responseId = (queryId<<8) projectionField = { } responseContent['field01'] = random.randint(0, 100) queryContent['field01'] = responseContent['field01'] queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY projectionField['field02'] = random.randint(0, 100) queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) # generate query 2 querying field00, field01 _id = str(random.random()) queryId = long((2<<16) + 2) queryContent = { } queryPredicates = { } projectionField = { } responseContent = {"_id": _id} responseId = (queryId<<8) responseContent['field01'] = random.randint(0, 100) queryContent['field01'] = responseContent['field01'] queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY projectionField['field02'] = random.randint(0, 100) queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 # generate query 3 querying field00, field01 but without projection field _id = str(random.random()) queryId = long((2<<16) + 3) queryContent = { } queryPredicates = { } projectionField = { } responseContent = {"_id": _id} responseId = (queryId<<8) responseContent['field01'] = random.randint(0, 100) queryContent['field01'] = responseContent['field01'] queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 # generate query 4 querying field00, field01 but it goes to collection 2 _id = str(random.random()) queryId = long((2<<16) + 4) queryContent = { } queryPredicates = { } projectionField = { } responseContent = {"_id": _id} responseId = (queryId<<8) responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME_2 op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 # generate query 5 querying field00 but it goes to collection 3 _id = str(random.random()) queryId = long((2<<16) + 5) queryContent = { } queryPredicates = { } projectionField = { } responseContent = {"_id": _id} responseId = (queryId<<8) responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME_3 op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 sess.save() # Use the MongoSniffConverter to populate our metadata converter = MongoSniffConverter(self.metadata_db, self.dataset_db) converter.no_mongo_parse = True converter.no_mongo_sessionizer = True converter.process() self.assertEqual(CostModelTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count()) self.collections = dict([ (c['name'], c) for c in self.metadata_db.Collection.fetch()]) populated_workload = list(c for c in self.metadata_db.Session.fetch()) self.workload = populated_workload # Increase the database size beyond what the converter derived from the workload for col_name, col_info in self.collections.iteritems(): col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS col_info['avg_doc_size'] = 1024 # bytes col_info['max_pages'] = col_info['doc_count'] * col_info['avg_doc_size'] / (4 * 1024) col_info.save() # print pformat(col_info) self.costModelConfig = { 'max_memory': 1024, # MB 'skew_intervals': CostModelTestCase.NUM_INTERVALS, 'address_size': 64, 'nodes': CostModelTestCase.NUM_NODES, 'window_size': 10 } self.state = State(self.collections, populated_workload, self.costModelConfig)
def createOrderStatus(self, params): ops = [] w_id = params["w_id"] d_id = params["d_id"] c_id = params["c_id"] c_last = params["c_last"] o_id = self.rng.randint(0, 10000) op = Session.operationFactory() responseContent = {} responseContent["C_W_ID"] = self.rng.randint(0, 100) responseContent["C_D_ID"] = self.rng.randint(0, 100) responseContent["C_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_CUSTOMER op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{ "#query": { "C_W_ID": w_id, "C_D_ID": d_id, "C_ID": c_id } }] op['query_fields'] = { "C_ID": 1, "C_FIRST": 1, "C_MIDDLE": 1, "C_LAST": 1, "C_BALANCE": 1 } op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["O_W_ID"] = self.rng.randint(0, 100) responseContent["O_D_ID"] = self.rng.randint(0, 100) responseContent["O_C_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDERS op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{ "#query": { "O_W_ID": w_id, "O_D_ID": d_id, "O_C_ID": c_id } }] op['query_fields'] = {"O_ID": 1, "O_CARRIER_ID": 1, "O_ENTRY_D": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["OL_W_ID"] = self.rng.randint(0, 100) responseContent["OL_D_ID"] = self.rng.randint(0, 100) responseContent["OL_O_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_ORDER_LINE op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{ "#query": { "OL_W_ID": w_id, "OL_D_ID": d_id, "OL_O_ID": o_id } }] op['query_fields'] = { "OL_SUPPLY_W_ID": 1, "OL_I_ID": 1, "OL_QUANTITY": 1, "OL_AMOUNT": 1, "OL_DELIVERY_D": 1 } op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) return ops
def createPayment(self, params): ops = [ ] w_id = params["w_id"] d_id = params["d_id"] h_amount = params["h_amount"] c_w_id = params["c_w_id"] c_d_id = params["c_d_id"] c_id = params["c_id"] c_last = params["c_last"] h_date = params["h_date"] op = Session.operationFactory() responseContent = {} responseContent["C_W_ID"] = self.rng.randint(0, 100) responseContent["C_D_ID"] = self.rng.randint(0, 100) responseContent["C_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_CUSTOMER op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"C_W_ID": w_id, "C_D_ID": d_id, "C_ID": c_id}}] op['query_fields'] = {"C_BALANCE": 0, "C_YTD_PAYMENT": 0, "C_PAYMENT_CNT": 0} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_WAREHOUSE op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"W_ID": w_id}}] op['query_fields'] = {"W_NAME": 1, "W_STREET_1": 1, "W_STREET_2": 1, "W_CITY": 1, "W_STATE": 1, "W_ZIP": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["W_NAME"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_WAREHOUSE op['type'] = constants.OP_TYPE_UPDATE op['query_id'] = self.nextQueryId() op['query_content'] = [ { "W_NAME" : "igmrhawo" }, { "#inc" : { "W_YTD" : 123 }} ] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() op['update_upsert'] = True ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["D_W_ID"] = self.rng.randint(0, 100) responseContent["D_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_DISTRICT op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"D_W_ID": w_id, "D_ID": d_id}}] op['query_fields'] = {"D_NAME": 1, "D_STREET_1": 1, "D_STREET_2": 1, "D_CITY": 1, "D_STATE": 1, "D_ZIP": 1} op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["D_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_DISTRICT op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query" : {"D_ID": d_id}}, {"#inc": {"D_YTD": h_amount}} ] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) return ops
def createPayment(self, params): ops = [] w_id = params["w_id"] d_id = params["d_id"] h_amount = params["h_amount"] c_w_id = params["c_w_id"] c_d_id = params["c_d_id"] c_id = params["c_id"] c_last = params["c_last"] h_date = params["h_date"] op = Session.operationFactory() responseContent = {} responseContent["C_W_ID"] = self.rng.randint(0, 100) responseContent["C_D_ID"] = self.rng.randint(0, 100) responseContent["C_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_CUSTOMER op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{ "#query": { "C_W_ID": w_id, "C_D_ID": d_id, "C_ID": c_id } }] op['query_fields'] = { "C_BALANCE": 0, "C_YTD_PAYMENT": 0, "C_PAYMENT_CNT": 0 } op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["W_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_WAREHOUSE op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query": {"W_ID": w_id}}] op['query_fields'] = { "W_NAME": 1, "W_STREET_1": 1, "W_STREET_2": 1, "W_CITY": 1, "W_STATE": 1, "W_ZIP": 1 } op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["W_NAME"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_WAREHOUSE op['type'] = constants.OP_TYPE_UPDATE op['query_id'] = self.nextQueryId() op['query_content'] = [{ "W_NAME": "igmrhawo" }, { "#inc": { "W_YTD": 123 } }] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() op['update_upsert'] = True ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["D_W_ID"] = self.rng.randint(0, 100) responseContent["D_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_DISTRICT op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{"#query": {"D_W_ID": w_id, "D_ID": d_id}}] op['query_fields'] = { "D_NAME": 1, "D_STREET_1": 1, "D_STREET_2": 1, "D_CITY": 1, "D_STATE": 1, "D_ZIP": 1 } op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) op = Session.operationFactory() responseContent = {} responseContent["D_ID"] = self.rng.randint(0, 100) op['resp_content'] = [responseContent] op['collection'] = tpccConstants.TABLENAME_DISTRICT op['type'] = constants.OP_TYPE_QUERY op['query_id'] = self.nextQueryId() op['query_content'] = [{ "#query": { "D_ID": d_id } }, { "#inc": { "D_YTD": h_amount } }] op['query_fields'] = None op['resp_id'] = self.nextResponseId() op['query_time'] = self.nextTimestamp() op['resp_time'] = self.nextTimestamp() ops.append(op) return ops
def setUp(self): MongoDBTestCase.setUp(self) # WORKLOAD timestamp = time.time() sess = self.metadata_db.Session() sess['session_id'] = 0 sess['ip_client'] = "client:%d" % (1234 + 0) sess['ip_server'] = "server:5678" sess['start_time'] = timestamp # generate query 0 querying field00 _id = str(random.random()) queryId = long((0 << 16) + 0) queryContent = {} queryPredicates = {} projectionField = {} responseContent = {"_id": _id} responseId = (queryId << 8) responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY projectionField['field02'] = random.randint(0, 100) queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) # generate query 1 querying field01 _id = str(random.random()) queryId = long((1 << 16) + 1) queryContent = {} queryPredicates = {} responseContent = {"_id": _id} responseId = (queryId << 8) projectionField = {} responseContent['field01'] = random.randint(0, 100) queryContent['field01'] = responseContent['field01'] queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY projectionField['field02'] = random.randint(0, 100) queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) # generate query 2 querying field00, field01 _id = str(random.random()) queryId = long((2 << 16) + 2) queryContent = {} queryPredicates = {} projectionField = {} responseContent = {"_id": _id} responseId = (queryId << 8) responseContent['field01'] = random.randint(0, 100) queryContent['field01'] = responseContent['field01'] queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY projectionField['field02'] = random.randint(0, 100) queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 # generate query 3 querying field00, field01 but without projection field _id = str(random.random()) queryId = long((2 << 16) + 3) queryContent = {} queryPredicates = {} projectionField = {} responseContent = {"_id": _id} responseId = (queryId << 8) responseContent['field01'] = random.randint(0, 100) queryContent['field01'] = responseContent['field01'] queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 # generate query 4 querying field00, field01 but it goes to collection 2 _id = str(random.random()) queryId = long((2 << 16) + 4) queryContent = {} queryPredicates = {} projectionField = {} responseContent = {"_id": _id} responseId = (queryId << 8) responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME_2 op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 # generate query 5 querying field00 but it goes to collection 3 _id = str(random.random()) queryId = long((2 << 16) + 5) queryContent = {} queryPredicates = {} projectionField = {} responseContent = {"_id": _id} responseId = (queryId << 8) responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME_3 op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 sess.save() # Use the MongoSniffConverter to populate our metadata converter = MongoSniffConverter(self.metadata_db, self.dataset_db) converter.no_mongo_parse = True converter.no_mongo_sessionizer = True converter.process() self.assertEqual(CostModelTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count()) self.collections = dict([(c['name'], c) for c in self.metadata_db.Collection.fetch()]) populated_workload = list(c for c in self.metadata_db.Session.fetch()) self.workload = populated_workload # Increase the database size beyond what the converter derived from the workload for col_name, col_info in self.collections.iteritems(): col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS col_info['avg_doc_size'] = 1024 # bytes col_info['max_pages'] = col_info['doc_count'] * col_info[ 'avg_doc_size'] / (4 * 1024) col_info.save() # print pformat(col_info) self.costModelConfig = { 'max_memory': 1024, # MB 'skew_intervals': CostModelTestCase.NUM_INTERVALS, 'address_size': 64, 'nodes': CostModelTestCase.NUM_NODES, 'window_size': 10 } self.state = State(self.collections, populated_workload, self.costModelConfig)