def commit_to_queue_(self): """Adds all pending changes to the task queues for async commits :return: Yields all shard names that need to be updated. """ pull = taskqueue.Queue('recordio-queue') rpcs = [] key_values_not_added = RecordIORecords() for shard_name, key_values in RecordIOShard.get_shards_for_key_values( self.name, self.updates): self.db_search += 1 if shard_name == None: for entry in key_values: key_values_not_added.insert(entry) else: for key_values_chunk in get_chunks(key_values, MAX_TASKQUEUE_BATCH_SIZE): payload = marshal.dumps(key_values_chunk, MARSHAL_VERSION) rpc = pull.add_async( taskqueue.Task(payload=payload, method='PULL', tag=shard_name)) rpcs.append((rpc, key_values_chunk, shard_name)) for rpc, key_values, shard_name in rpcs: try: rpc.get_result() yield shard_name except: for entry in key_values: key_values_not_added.insert(entry) self.updates = key_values_not_added if len(self.updates): raise RecordIOWriterNotCompletedError(len(self.updates))
def testShardNamesForKeysMissingHi(self): recordio_lo = RecordIOShard.create("test", hi="1") recordio_lo.insert(("0", "a")) recordio_lo.commit() self.assertEqual({ None: [("1", )] }, self.getResult(RecordIOShard.get_shards_for_key_values( "test", [("1", )])))
def testShardNamesForShorterKeys(self): RecordIOShard.create("test", hi=("a", "")).commit() RecordIOShard.create("test", lo=("a", "")).commit() self.assertEqual({ RecordIOShard.key_name("test", lo=("a", "")): [("aa", ),] }, self.getResult(RecordIOShard.get_shards_for_key_values( "test", [("aa",)])))
def commit_to_queue_(self): """Adds all pending changes to the task queues for async commits :return: Yields all shard names that need to be updated. """ pull = taskqueue.Queue('recordio-queue') rpcs = [] key_values_not_added = RecordIORecords() for shard_name, key_values in RecordIOShard.get_shards_for_key_values( self.name, self.updates): self.db_search += 1 if shard_name == None: for entry in key_values: key_values_not_added.insert(entry) else: for key_values_chunk in get_chunks(key_values, MAX_TASKQUEUE_BATCH_SIZE): payload = marshal.dumps(key_values_chunk, MARSHAL_VERSION) rpc = pull.add_async(taskqueue.Task(payload=payload, method='PULL', tag=shard_name)) rpcs.append((rpc, key_values_chunk, shard_name)) for rpc, key_values, shard_name in rpcs: try: rpc.get_result() yield shard_name except: for entry in key_values: key_values_not_added.insert(entry) self.updates = key_values_not_added if len(self.updates): raise RecordIOWriterNotCompletedError(len(self.updates))
def testShardNamesForKeysEmpty(self): recordio = RecordIOShard.create("test") recordio.insert(("0", "a")) recordio.insert(("1", "b")) recordio.insert(("2", "c")) recordio.commit() self.assertEqual({ RecordIOShard.key_name("test"): [("", ),] }, self.getResult(RecordIOShard.get_shards_for_key_values( "test", [("",)])))
def testShardNamesForKeysSplit(self): recordio = RecordIOShard.create("test") test_strings = [str(x) for x in range(10)] for x in test_strings: recordio.insert((x, test_helper.uncompressableString(2**16))) recordio.commit() self.assertEqual({ RecordIOShard.key_name("test"): [("0", ""), ("1", "")] }, self.getResult(RecordIOShard.get_shards_for_key_values( "test", [("0", ""), ("1", "")]))) recordio.delete() shard_0, shard_1 = recordio.split() shard_1, shard_2 = shard_1.split() shard_0.commit() shard_1.commit() shard_2.commit() self.assertEqual({ shard_0.key().name(): [('0', '0'), ('1', '1'), ('2', '2'), ('3', '3'), ('4', '4')], shard_1.key().name(): [('5', '5'), ('6', '6'), ('7', '7')], shard_2.key().name(): [('8', '8'), ('9', '9')]}, self.getResult(RecordIOShard.get_shards_for_key_values( "test", zip(test_strings, test_strings))))
def read_entries_(self, start_key=None, end_key=None): """An internal helper function to read split entries. :param start_key: An entry tuple (no value needed) :param end_key: An entry tuple (no value needed) Exclusive. :return: Yields key, split_values """ # TODO (andrin): fetch a couple of shards instead of just one based on # method argument current_key = start_key if current_key == None: current_key = ("", ) limit_shard_name = RecordIOShard.key_name(self.name, lo=start_key, hi=end_key).split(SPLIT_CHAR) while True: shard = RecordIOShard.get_shards_for_key_values( self.name, [current_key], keys_only=False).next()[0] self.db_search_and_get += 1 if shard == None: raise RecordIOShardDoesNotExistError(self.name) hi = shard.lo_hi()[1] shard_name = shard.key().name().split(SPLIT_CHAR) if (shard_name[6:10] >= limit_shard_name[6:10] and (shard_name[2:5] < limit_shard_name[2:5] or limit_shard_name[2] == SPLIT_CHAR_AFTER)): # Read the whole shard for entry in shard: yield entry else: # Read parts of the shard for entry in shard.read(current_key, end_key): yield entry if hi == None: # Was the last shard return current_key = hi if (end_key != None and RecordIORecords.entry_comperator( current_key, end_key) >= 0): # Next shard is after end_key return
def read_entries_(self, start_key=None, end_key=None): """An internal helper function to read split entries. :param start_key: An entry tuple (no value needed) :param end_key: An entry tuple (no value needed) Exclusive. :return: Yields key, split_values """ # TODO (andrin): fetch a couple of shards instead of just one based on # method argument current_key = start_key if current_key == None: current_key = ("", ) limit_shard_name = RecordIOShard.key_name( self.name, lo=start_key, hi=end_key).split(SPLIT_CHAR) while True: shard = RecordIOShard.get_shards_for_key_values( self.name, [current_key], keys_only=False).next()[0] self.db_search_and_get += 1 if shard == None: raise RecordIOShardDoesNotExistError(self.name) hi = shard.lo_hi()[1] shard_name = shard.key().name().split(SPLIT_CHAR) if (shard_name[6:10] >= limit_shard_name[6:10] and (shard_name[2:5] < limit_shard_name[2:5] or limit_shard_name[2] == SPLIT_CHAR_AFTER)): # Read the whole shard for entry in shard: yield entry else: # Read parts of the shard for entry in shard.read(current_key, end_key): yield entry if hi == None: # Was the last shard return current_key = hi if (end_key != None and RecordIORecords.entry_comperator(current_key, end_key) >= 0): # Next shard is after end_key return
def testShardNamesForKeysNone(self): self.assertEqual({ None: [("0", ""), ("1", "")] }, self.getResult(RecordIOShard.get_shards_for_key_values( "test", [("0", ""), ("1", "")])))
def commit_sync(self, retries=32, retry_timeout=1): """Applies all changes synchronously to the RecordIO. :param retries: How many times a commit_sync should be retried in case of datastore collisions. :param retry_timeout: The amount of second to wait before the next retry. """ if not len(self.updates): return for attempt in range(retries + 1): shard_does_not_exist = RecordIORecords() for shard_name, key_values in RecordIOShard.get_shards_for_key_values( self.name, self.updates): self.db_search += 1 if shard_name == None and key_values: logging.debug( "RecordIO %s: No shard found for:\n%s -> %s" % (self.name, SPLIT_CHAR.join(RecordIOShard.entry_key( key_values[0])), key_values[0][:-1])) for entry in key_values: shard_does_not_exist.insert(entry) else: lo_just_split = None hi_just_split = None for key_values_chunk in get_chunks(key_values, MAX_WRITE_BATCH_SIZE): if lo_just_split and hi_just_split and key_values_chunk: if RecordIORecords.in_range(key_values_chunk[0], lo=lo_just_split[0], hi=lo_just_split[1]): shard_name = RecordIOShard.key_name( self.name, lo=lo_just_split[0], hi=lo_just_split[1]) elif RecordIORecords.in_range(key_values_chunk[0], lo=hi_just_split[0], hi=hi_just_split[1]): shard_name = RecordIOShard.key_name( self.name, lo=hi_just_split[0], hi=hi_just_split[1]) not_deleted = None try: not_deleted, lo_just_split, hi_just_split = self.commit_shard_( shard_name, key_values_chunk) except RecordIOShardDoesNotExistError: logging.debug("Shard does not exist:\n" + shard_name) lo_just_split = None hi_just_split = None for entry in key_values_chunk: shard_does_not_exist.insert(entry) if not_deleted: for to_delete_shard_name, to_delete_key_values in ( RecordIOShard.get_shards_for_key_values( self.name, not_deleted)): self.db_search += 1 try: self.commit_shard_(to_delete_shard_name, to_delete_key_values) except RecordIOShardDoesNotExistError: logging.debug("Shard does not exist:\n" + shard_name) for entry in to_delete_key_values: shard_does_not_exist.insert(entry) self.updates = shard_does_not_exist if len(self.updates): if attempt == retries: raise RecordIOWriterNotCompletedError(len(self.updates)) else: logging.debug("Commit attempt %d failed" % attempt) time.sleep(retry_timeout) else: return
def commit_sync(self, retries=32, retry_timeout=1): """Applies all changes synchronously to the RecordIO. :param retries: How many times a commit_sync should be retried in case of datastore collisions. :param retry_timeout: The amount of second to wait before the next retry. """ if not len(self.updates): return for attempt in range(retries + 1): shard_does_not_exist = RecordIORecords() for shard_name, key_values in RecordIOShard.get_shards_for_key_values( self.name, self.updates): self.db_search += 1 if shard_name == None and key_values: logging.debug("RecordIO %s: No shard found for:\n%s -> %s" % (self.name, SPLIT_CHAR.join(RecordIOShard.entry_key(key_values[0])), key_values[0][:-1])) for entry in key_values: shard_does_not_exist.insert(entry) else: lo_just_split = None hi_just_split = None for key_values_chunk in get_chunks(key_values, MAX_WRITE_BATCH_SIZE): if lo_just_split and hi_just_split and key_values_chunk: if RecordIORecords.in_range(key_values_chunk[0], lo=lo_just_split[0], hi=lo_just_split[1]): shard_name = RecordIOShard.key_name(self.name, lo=lo_just_split[0], hi=lo_just_split[1]) elif RecordIORecords.in_range(key_values_chunk[0], lo=hi_just_split[0], hi=hi_just_split[1]): shard_name = RecordIOShard.key_name(self.name, lo=hi_just_split[0], hi=hi_just_split[1]) not_deleted = None try: not_deleted, lo_just_split, hi_just_split = self.commit_shard_( shard_name, key_values_chunk) except RecordIOShardDoesNotExistError: logging.debug("Shard does not exist:\n" + shard_name) lo_just_split = None hi_just_split = None for entry in key_values_chunk: shard_does_not_exist.insert(entry) if not_deleted: for to_delete_shard_name, to_delete_key_values in ( RecordIOShard.get_shards_for_key_values( self.name, not_deleted)): self.db_search += 1 try: self.commit_shard_(to_delete_shard_name, to_delete_key_values) except RecordIOShardDoesNotExistError: logging.debug("Shard does not exist:\n" + shard_name) for entry in to_delete_key_values: shard_does_not_exist.insert(entry) self.updates = shard_does_not_exist if len(self.updates): if attempt == retries: raise RecordIOWriterNotCompletedError(len(self.updates)) else: logging.debug("Commit attempt %d failed" % attempt) time.sleep(retry_timeout) else: return