def post(self): values = {} if self.request.get("run_uncompressed"): values["run_uncompressed"] = "checked" if self.request.get("run_compressed"): values["run_compressed"] = "checked" if self.request.get("delete"): RecordIOWriter("loadtest_single_compressed").delete() RecordIOWriter("loadtest_single_uncompressed").delete() RecordIOWriter("loadtest_combined_compressed").delete() RecordIOWriter("loadtest_combined_uncompressed").delete() self.handle(values) return amount = int(self.request.get("entries")) entry_size_min = int(self.request.get("entry_size_min")) entry_size_max = int(self.request.get("entry_size_max")) entry_size_key = int(self.request.get("entry_size_key")) compressable = self.request.get("compressable") entries = [] gen = StringGenerator(compressable) for i in xrange(amount): entries.append( (str(random.randint(0, entry_size_key)), gen.next(random.randint(entry_size_min, entry_size_max)))) values["ran"] = True single = "single" if (self.request.get("run_uncompressed") and self.request.get("run_compressed")): single = "combined" if self.request.get("run_uncompressed"): logging.info("Starting uncompressed write loadtest") values["write_uncompressed"] = self.do_write( single, False, entries) if self.request.get("run_compressed"): logging.info("Starting compressed write loadtest") values["write_compressed"] = self.do_write(single, True, entries) try: if self.request.get("run_uncompressed"): logging.info("Starting uncompressed read loadtest") values["read_uncompressed"] = self.do_read( single, False, entries) if self.request.get("run_compressed"): logging.info("Starting compressed read loadtest") values["read_compressed"] = self.do_read(single, True, entries) except NotEveryThingWrittenError: logging.info("Maybe not ready to read!") time.sleep(5) if self.request.get("run_uncompressed"): logging.info("Starting uncompressed read loadtest") values["read_uncompressed"] = self.do_read( single, False, entries) if self.request.get("run_compressed"): logging.info("Starting compressed read loadtest") values["read_compressed"] = self.do_read(single, True, entries) logging.info("Loadtests done") self.handle(values)
def testTaskQueue(self): writer = RecordIOWriter("test") writer.create(compressed=False) test_value = test_helper.uncompressableString(MAX_ENTRY_SIZE-1) entries_to_write = MAX_BLOB_SIZE / MAX_ENTRY_SIZE + 1 for i in range(entries_to_write): writer.insert(str(i), test_value) writer.commit_async() taskq = self.testbed.get_stub(testbed.TASKQUEUE_SERVICE_NAME) tasks = taskq.GetTasks("recordio-writer") for task in tasks: url=task["url"] args = urlparse.parse_qs(base64.b64decode(task["body"])) for x in args: args[x] = args[x][0] test_helper.requestGet(WriteHandler(), url, args) assert(len([x for x in RecordIOShard.all()]) > 1) reader = RecordIOReader("test") result = {} for key, value in reader: result[key] = value self.assertEqual(len(result), entries_to_write) for i in range(entries_to_write): self.assertEqual(result[str(i)], test_value, "Not equal")
def do_write(self, single, compressed, entries): start = time.time() writer = RecordIOWriter("loadtest_" + single + "_" + { True: "compressed", False: "uncompressed" }[compressed]) writer.create(compressed=compressed) for entry in entries: writer.insert(entry[0], entry[1]) writer.commit_sync(retries=10) return time.time() - start, writer.db_stats()
def do_write(self, single, compressed, entries): start = time.time() writer = RecordIOWriter("loadtest_" + single + "_" + { True: "compressed", False: "uncompressed"}[compressed]) writer.create(compressed=compressed) for entry in entries: writer.insert(entry[0], entry[1]) writer.commit_sync(retries=10) return time.time() - start, writer.db_stats()
def get(self): self.pull = taskqueue.Queue('recordio-queue') tag = self.request.get("taskqueue") max_tasks_to_lease = MAX_RPC_SIZE / MAX_TASKQUEUE_BATCH_SIZE if tag: batch = [] batch_size = 0 success = True while True: tasks = self.pull.lease_tasks_by_tag(LEASE_TIME_PER_BATCH, max_tasks_to_lease, tag=tag) for task in tasks: if task.was_deleted: # Should never happend continue next_key_values = marshal.loads(task.payload) next_size = sum( [recordio_chunks.size(x) for x in next_key_values]) if next_size + batch_size >= MAX_WRITE_BATCH_SIZE: success = success and self.commit_batch(tag, batch) batch = [(task, next_key_values)] batch_size = next_size else: batch_size += next_size batch.append((task, next_key_values)) if len(tasks) != max_tasks_to_lease: break success = success and self.commit_batch(tag, batch) if not success: raise Exception("RecordIO not completed") else: pending_tasks = self.pull.lease_tasks(0, max_tasks_to_lease) seen = set([]) for task in pending_tasks: tag = task.tag if tag in seen: continue seen.add(tag) try: taskqueue.Queue('recordio-writer').add( RecordIOWriter.create_task_(tag, in_past=True)) self.response.out.write("Scheduled write for: %s<br>" % tag) except (taskqueue.DuplicateTaskNameError, taskqueue.TombstonedTaskError, taskqueue.TaskAlreadyExistsError): self.response.out.write( "Already pending write for: %s<br>" % tag) if len(pending_tasks) == max_tasks_to_lease: self.response.out.write( "<script type=text/javascript>window.setTimeout(function() {" "document.location.reload();" "}, 5000);</script>")
def testCommitToQueueAndScheduleWrite(self): updater = RecordIOWriter("test") updater.create() updater.insert("a", "") updater.commit_async() taskq = self.testbed.get_stub(testbed.TASKQUEUE_SERVICE_NAME) tasks = taskq.GetTasks("recordio-writer") self.assertEqual(len(tasks), 1) self.assertEqual(tasks[0]["url"], "/recordio/write") self.assertEqual(base64.b64decode(tasks[0]["body"]), "taskqueue=" + urllib.quote( RecordIOShard.key_name("test")))
def get(self): self.pull = taskqueue.Queue('recordio-queue') tag = self.request.get("taskqueue") max_tasks_to_lease = MAX_RPC_SIZE / MAX_TASKQUEUE_BATCH_SIZE if tag: batch = [] batch_size = 0 success = True while True: tasks = self.pull.lease_tasks_by_tag(LEASE_TIME_PER_BATCH, max_tasks_to_lease, tag=tag) for task in tasks: if task.was_deleted: # Should never happend continue next_key_values = marshal.loads(task.payload) next_size = sum([recordio_chunks.size(x) for x in next_key_values]) if next_size + batch_size >= MAX_WRITE_BATCH_SIZE: success = success and self.commit_batch(tag, batch) batch = [(task, next_key_values)] batch_size = next_size else: batch_size += next_size batch.append((task, next_key_values)) if len(tasks) != max_tasks_to_lease: break success = success and self.commit_batch(tag, batch) if not success: raise Exception("RecordIO not completed") else: pending_tasks = self.pull.lease_tasks(0, max_tasks_to_lease) seen = set([]) for task in pending_tasks: tag = task.tag if tag in seen: continue seen.add(tag) try: taskqueue.Queue('recordio-writer').add( RecordIOWriter.create_task_(tag, in_past=True)) self.response.out.write("Scheduled write for: %s<br>" % tag) except (taskqueue.DuplicateTaskNameError, taskqueue.TombstonedTaskError, taskqueue.TaskAlreadyExistsError): self.response.out.write("Already pending write for: %s<br>" % tag) if len(pending_tasks) == max_tasks_to_lease: self.response.out.write( "<script type=text/javascript>window.setTimeout(function() {" "document.location.reload();" "}, 5000);</script>")
def commit_batch(self, tag, batch): """Applies a batch of values to a RecordIO and deletes the taskqueue task, :param tag: The current tag we are working on :param batch: A list of (tasqueue_task, key_value_list) :return: True on success """ if batch: done_tasks = [] count = 0 writer = RecordIOWriter(RecordIOShard.get_name(tag)) for done_task, key_values in batch: done_tasks.append(done_task) for entry in key_values: writer.insert_entry_(entry) count += 1 try: writer.commit_sync(retries=1) try: self.pull.delete_tasks(done_tasks) except taskqueue.BadTaskStateError: for task in done_tasks: if task.was_deleted: continue try: self.pull.delete_tasks(task) except taskqueue.BadTaskStateError: logging.debug("RecordIO Failed to free task %s on %s" % task.name, tag) logging.debug("RecordIO wrote %d entries to %s" % (count, writer.name)) except RecordIOWriterNotCompletedError: logging.debug("RecordIO not completed on: %s" % tag) for task in done_tasks: self.pull.modify_task_lease(task, 0) return False return True
def testCommitToQueueSplitEntries(self): chunk_size = MAX_ENTRY_SIZE + 1 test_string = test_helper.uncompressableString(chunk_size) updater = RecordIOWriter("test") updater.create() updater.insert("test", test_string) list(updater.commit_to_queue_()) pull = taskqueue.Queue('recordio-queue') tasks = list(pull.lease_tasks(60, 100)) self.assertEqual(len(tasks), 1) self.assertEqual(tasks[0].tag, RecordIOShard.key_name("test")) updates = marshal.loads(tasks[0].payload) self.assertEqual([('test', 0, 2), ('test', 1, 2)], [x[:-2] for x in updates]) self.assertEqual(STRING + test_string, "".join([x[-1] for x in updates]))
def testWriteDuringSplit(self): recordio = RecordIOShard.create("test", compressed=False) recordio.insert(("1", STRING + "1")) recordio.insert(("2", STRING + "2")) lo_shard, hi_shard = recordio.split() lo_shard.commit() updater = RecordIOWriter("test") updater.insert("3", "3") self.assertRaises(RecordIOShardDoesNotExistError, updater.commit_shard_, hi_shard.key().name(), updater.updates) self.assertRaises(RecordIOWriterNotCompletedError, updater.commit_sync, 32, 0) hi_shard.commit() updater.insert("0", STRING + "0") updater.commit_sync() lo_shard, hi_shard = [x for x in RecordIOShard.all()] self.assertEqual([x[0] for x in lo_shard], ["0", "1"]) self.assertEqual([x[0] for x in hi_shard], ["2", "3"])
def testCommitToQueue(self): updater = RecordIOWriter("test") updater.create() chunk_size = MAX_ENTRY_SIZE - 1 entries_to_write = MAX_TASKQUEUE_BATCH_SIZE / MAX_ENTRY_SIZE + 1 for i in xrange(entries_to_write): updater.insert(str("%09d" % i), test_helper.uncompressableString(chunk_size)) list(updater.commit_to_queue_()) pull = taskqueue.Queue('recordio-queue') tasks = list(pull.lease_tasks(60, 100)) self.assertEqual(len(tasks), 2) self.assertEqual(tasks[0].tag, RecordIOShard.key_name("test")) self.assertEqual(tasks[1].tag, RecordIOShard.key_name("test")) updates_0 = marshal.loads(tasks[0].payload) updates_1 = marshal.loads(tasks[1].payload) self.assertEqual([str("%09d" % x) for x in xrange(entries_to_write)], [x[0] for x in updates_0] + [x[0] for x in updates_1]) self.assertTrue(updates_0[0][1] == STRING + test_helper.uncompressableString(chunk_size))
def commit_batch(self, tag, batch): """Applies a batch of values to a RecordIO and deletes the taskqueue task, :param tag: The current tag we are working on :param batch: A list of (tasqueue_task, key_value_list) :return: True on success """ if batch: done_tasks = [] count = 0 writer = RecordIOWriter(RecordIOShard.get_name(tag)) for done_task, key_values in batch: done_tasks.append(done_task) for entry in key_values: writer.insert_entry_(entry) count += 1 try: writer.commit_sync(retries=1) try: self.pull.delete_tasks(done_tasks) except taskqueue.BadTaskStateError: for task in done_tasks: if task.was_deleted: continue try: self.pull.delete_tasks(task) except taskqueue.BadTaskStateError: logging.debug( "RecordIO Failed to free task %s on %s" % task.name, tag) logging.debug("RecordIO wrote %d entries to %s" % (count, writer.name)) except RecordIOWriterNotCompletedError: logging.debug("RecordIO not completed on: %s" % tag) for task in done_tasks: self.pull.modify_task_lease(task, 0) return False return True
def writeOneShard(self, compressed): updater = RecordIOWriter("test") updater.create(compressed=compressed) updater.insert("1", "foo") updater.insert("2", "bar") updater.commit_sync() updater = RecordIOWriter("test") updater.insert("3", "win") updater.remove("2") updater.commit_sync() recordio = RecordIOShard.all().get() self.assertEqual(recordio.compressed, compressed) self.assertEqual([x for x in recordio], [("1", STRING + "foo"), ("3", STRING + "win")])
def post(self): name = self.request.get("name") compressed = not not self.request.get("compressed") key = self.request.get("key", None) value = self.request.get("value", None) if name: writer = RecordIOWriter(name) if key == None and value == None: writer.create(compressed) elif value == None: writer.remove(key) writer.commit_sync() else: writer.insert(str(key), eval(value)) writer.commit_sync() start = None if key: start = str(key) self.redirect("?name=" + str(urllib.quote(name)) + "&start=" + urllib.quote(start)) delete = self.request.get("delete") if delete: writer = RecordIOWriter(delete) writer.delete() self.redirect("/recordio/")
def testWriteStringMarshalPickle(self): updater = RecordIOWriter("test") updater.create() updater.insert("string", "string") marshalable = {"a": [1,2,3]} updater.insert("marshal", marshalable) class AnyClass(): pass pickleable = AnyClass() updater.insert("cpickle", pickleable) updater.commit_sync() recordio = RecordIOShard.all().get() self.assertEqual([x for x in recordio], [("cpickle", CPICKLE + cPickle.dumps(pickleable)), ("marshal", MARSHAL + marshal.dumps(marshalable)), ("string", STRING + "string")])
def write2MBAndReplace(self, compressed): test_string = test_helper.uncompressableString(2**21) updater = RecordIOWriter("test") updater.create(compressed=compressed) updater.insert("test", test_string) updater.commit_sync() output = [] entries = 0 shards_count = 0 for recordio in RecordIOShard.all(): self.assertTrue(len(recordio.data) >= 1000) shards_count += 1 for entry in recordio: output += [entry[-1]] entries += 1 self.assertTrue(shards_count > 1) self.assertTrue(entries > 3) self.assertEqual("".join(output), STRING + test_string, "read != write") updater.insert("test", "short") updater.commit_sync(retries=0) replaced_shards_count = 0 for recordio in RecordIOShard.all(): if replaced_shards_count == 0: self.assertEqual(1, len(recordio)) for entry in recordio: self.assertEqual(STRING + "short", entry[-1]) else: self.assertEqual(0, len(recordio)) for entry in recordio: self.fail("shouldnt be iterable") replaced_shards_count += 1 self.assertTrue(len(recordio.data) < 1000) self.assertTrue(replaced_shards_count > 0) self.assertTrue(replaced_shards_count <= shards_count)