示例#1
0
    def commit_to_queue_(self):
        """Adds all pending changes to the task queues for async commits

    :return: Yields all shard names that need to be updated.
    """
        pull = taskqueue.Queue('recordio-queue')
        rpcs = []
        key_values_not_added = RecordIORecords()
        for shard_name, key_values in RecordIOShard.get_shards_for_key_values(
                self.name, self.updates):
            self.db_search += 1
            if shard_name == None:
                for entry in key_values:
                    key_values_not_added.insert(entry)
            else:
                for key_values_chunk in get_chunks(key_values,
                                                   MAX_TASKQUEUE_BATCH_SIZE):
                    payload = marshal.dumps(key_values_chunk, MARSHAL_VERSION)
                    rpc = pull.add_async(
                        taskqueue.Task(payload=payload,
                                       method='PULL',
                                       tag=shard_name))
                    rpcs.append((rpc, key_values_chunk, shard_name))

        for rpc, key_values, shard_name in rpcs:
            try:
                rpc.get_result()
                yield shard_name
            except:
                for entry in key_values:
                    key_values_not_added.insert(entry)
        self.updates = key_values_not_added
        if len(self.updates):
            raise RecordIOWriterNotCompletedError(len(self.updates))
示例#2
0
 def testShardNamesForKeysMissingHi(self):
   recordio_lo = RecordIOShard.create("test", hi="1")
   recordio_lo.insert(("0", "a"))
   recordio_lo.commit()
   self.assertEqual({ None: [("1", )] },
                    self.getResult(RecordIOShard.get_shards_for_key_values(
                                   "test", [("1", )])))
示例#3
0
 def testShardNamesForShorterKeys(self):
   RecordIOShard.create("test", hi=("a", "")).commit()
   RecordIOShard.create("test", lo=("a", "")).commit()
   self.assertEqual({ RecordIOShard.key_name("test", lo=("a", "")):
                          [("aa", ),] },
                     self.getResult(RecordIOShard.get_shards_for_key_values(
                                    "test", [("aa",)])))
示例#4
0
  def commit_to_queue_(self):
    """Adds all pending changes to the task queues for async commits

    :return: Yields all shard names that need to be updated.
    """
    pull = taskqueue.Queue('recordio-queue')
    rpcs = []
    key_values_not_added = RecordIORecords()
    for shard_name, key_values in RecordIOShard.get_shards_for_key_values(
          self.name, self.updates):
      self.db_search += 1
      if shard_name == None:
        for entry in key_values:
          key_values_not_added.insert(entry)
      else:
        for key_values_chunk in get_chunks(key_values, MAX_TASKQUEUE_BATCH_SIZE):
          payload = marshal.dumps(key_values_chunk, MARSHAL_VERSION)
          rpc = pull.add_async(taskqueue.Task(payload=payload, method='PULL',
                                              tag=shard_name))
          rpcs.append((rpc, key_values_chunk, shard_name))
    
    for rpc, key_values, shard_name in rpcs:
      try:
        rpc.get_result()
        yield shard_name
      except:
        for entry in key_values:
          key_values_not_added.insert(entry)
    self.updates = key_values_not_added
    if len(self.updates):
      raise RecordIOWriterNotCompletedError(len(self.updates))
示例#5
0
 def testShardNamesForKeysEmpty(self):
   recordio = RecordIOShard.create("test")
   recordio.insert(("0", "a"))
   recordio.insert(("1", "b"))
   recordio.insert(("2", "c"))
   recordio.commit()
   self.assertEqual({ RecordIOShard.key_name("test"): [("", ),] },
                    self.getResult(RecordIOShard.get_shards_for_key_values(
                                   "test", [("",)])))
示例#6
0
 def testShardNamesForKeysSplit(self):
   recordio = RecordIOShard.create("test")
   test_strings = [str(x) for x in range(10)]
   for x in test_strings:
     recordio.insert((x, test_helper.uncompressableString(2**16)))
   recordio.commit()
   self.assertEqual({ RecordIOShard.key_name("test"):
                          [("0", ""), ("1", "")] },
                    self.getResult(RecordIOShard.get_shards_for_key_values(
                                   "test", [("0", ""), ("1", "")])))
   recordio.delete()
   shard_0, shard_1 = recordio.split()
   shard_1, shard_2 = shard_1.split()
   shard_0.commit()
   shard_1.commit()
   shard_2.commit()
   self.assertEqual({ shard_0.key().name(): [('0', '0'), ('1', '1'),
                                             ('2', '2'), ('3', '3'),
                                             ('4', '4')],
                     shard_1.key().name(): [('5', '5'), ('6', '6'),
                                            ('7', '7')],
                     shard_2.key().name(): [('8', '8'), ('9', '9')]},
                     self.getResult(RecordIOShard.get_shards_for_key_values(
                                    "test", zip(test_strings, test_strings))))
示例#7
0
    def read_entries_(self, start_key=None, end_key=None):
        """An internal helper function to read split entries.

    :param start_key: An entry tuple (no value needed)
    :param end_key: An entry tuple (no value needed) Exclusive.
    :return: Yields key, split_values
    """
        # TODO (andrin): fetch a couple of shards instead of just one based on
        #                method argument
        current_key = start_key
        if current_key == None:
            current_key = ("", )
        limit_shard_name = RecordIOShard.key_name(self.name,
                                                  lo=start_key,
                                                  hi=end_key).split(SPLIT_CHAR)
        while True:
            shard = RecordIOShard.get_shards_for_key_values(
                self.name, [current_key], keys_only=False).next()[0]
            self.db_search_and_get += 1
            if shard == None:
                raise RecordIOShardDoesNotExistError(self.name)
            hi = shard.lo_hi()[1]
            shard_name = shard.key().name().split(SPLIT_CHAR)
            if (shard_name[6:10] >= limit_shard_name[6:10]
                    and (shard_name[2:5] < limit_shard_name[2:5]
                         or limit_shard_name[2] == SPLIT_CHAR_AFTER)):
                # Read the whole shard
                for entry in shard:
                    yield entry
            else:
                # Read parts of the shard
                for entry in shard.read(current_key, end_key):
                    yield entry
            if hi == None:
                # Was the last shard
                return
            current_key = hi
            if (end_key != None and RecordIORecords.entry_comperator(
                    current_key, end_key) >= 0):
                # Next shard is after end_key
                return
示例#8
0
  def read_entries_(self, start_key=None, end_key=None):
    """An internal helper function to read split entries.

    :param start_key: An entry tuple (no value needed)
    :param end_key: An entry tuple (no value needed) Exclusive.
    :return: Yields key, split_values
    """
    # TODO (andrin): fetch a couple of shards instead of just one based on
    #                method argument
    current_key = start_key
    if current_key == None:
      current_key = ("", )
    limit_shard_name = RecordIOShard.key_name(
        self.name, lo=start_key, hi=end_key).split(SPLIT_CHAR)
    while True:
      shard = RecordIOShard.get_shards_for_key_values(
          self.name, [current_key], keys_only=False).next()[0]
      self.db_search_and_get += 1
      if shard == None:
        raise RecordIOShardDoesNotExistError(self.name)
      hi = shard.lo_hi()[1]
      shard_name = shard.key().name().split(SPLIT_CHAR)
      if (shard_name[6:10] >= limit_shard_name[6:10] and
          (shard_name[2:5] < limit_shard_name[2:5] or
           limit_shard_name[2] == SPLIT_CHAR_AFTER)):
        # Read the whole shard
        for entry in shard:
          yield entry
      else:
        # Read parts of the shard
        for entry in shard.read(current_key, end_key):
          yield entry
      if hi == None:
        # Was the last shard
        return
      current_key = hi
      if (end_key != None and
          RecordIORecords.entry_comperator(current_key, end_key) >= 0):
        # Next shard is after end_key
        return
示例#9
0
 def testShardNamesForKeysNone(self):
   self.assertEqual({ None: [("0", ""), ("1", "")] },
                     self.getResult(RecordIOShard.get_shards_for_key_values(
                                    "test", [("0", ""), ("1", "")])))
示例#10
0
    def commit_sync(self, retries=32, retry_timeout=1):
        """Applies all changes synchronously to the RecordIO.

    :param retries: How many times a commit_sync should be retried in case of
                    datastore collisions.
    :param retry_timeout: The amount of second to wait before the next retry.
    """
        if not len(self.updates):
            return
        for attempt in range(retries + 1):
            shard_does_not_exist = RecordIORecords()
            for shard_name, key_values in RecordIOShard.get_shards_for_key_values(
                    self.name, self.updates):
                self.db_search += 1
                if shard_name == None and key_values:
                    logging.debug(
                        "RecordIO %s: No shard found for:\n%s -> %s" %
                        (self.name,
                         SPLIT_CHAR.join(RecordIOShard.entry_key(
                             key_values[0])), key_values[0][:-1]))
                    for entry in key_values:
                        shard_does_not_exist.insert(entry)
                else:
                    lo_just_split = None
                    hi_just_split = None
                    for key_values_chunk in get_chunks(key_values,
                                                       MAX_WRITE_BATCH_SIZE):
                        if lo_just_split and hi_just_split and key_values_chunk:
                            if RecordIORecords.in_range(key_values_chunk[0],
                                                        lo=lo_just_split[0],
                                                        hi=lo_just_split[1]):
                                shard_name = RecordIOShard.key_name(
                                    self.name,
                                    lo=lo_just_split[0],
                                    hi=lo_just_split[1])
                            elif RecordIORecords.in_range(key_values_chunk[0],
                                                          lo=hi_just_split[0],
                                                          hi=hi_just_split[1]):
                                shard_name = RecordIOShard.key_name(
                                    self.name,
                                    lo=hi_just_split[0],
                                    hi=hi_just_split[1])
                        not_deleted = None
                        try:
                            not_deleted, lo_just_split, hi_just_split = self.commit_shard_(
                                shard_name, key_values_chunk)
                        except RecordIOShardDoesNotExistError:
                            logging.debug("Shard does not exist:\n" +
                                          shard_name)
                            lo_just_split = None
                            hi_just_split = None
                            for entry in key_values_chunk:
                                shard_does_not_exist.insert(entry)
                        if not_deleted:
                            for to_delete_shard_name, to_delete_key_values in (
                                    RecordIOShard.get_shards_for_key_values(
                                        self.name, not_deleted)):
                                self.db_search += 1
                                try:
                                    self.commit_shard_(to_delete_shard_name,
                                                       to_delete_key_values)
                                except RecordIOShardDoesNotExistError:
                                    logging.debug("Shard does not exist:\n" +
                                                  shard_name)
                                    for entry in to_delete_key_values:
                                        shard_does_not_exist.insert(entry)
            self.updates = shard_does_not_exist
            if len(self.updates):
                if attempt == retries:
                    raise RecordIOWriterNotCompletedError(len(self.updates))
                else:
                    logging.debug("Commit attempt %d failed" % attempt)
                    time.sleep(retry_timeout)
            else:
                return
示例#11
0
  def commit_sync(self, retries=32, retry_timeout=1):
    """Applies all changes synchronously to the RecordIO.

    :param retries: How many times a commit_sync should be retried in case of
                    datastore collisions.
    :param retry_timeout: The amount of second to wait before the next retry.
    """
    if not len(self.updates):
      return
    for attempt in range(retries + 1):
      shard_does_not_exist = RecordIORecords()
      for shard_name, key_values in RecordIOShard.get_shards_for_key_values(
          self.name, self.updates):
        self.db_search += 1
        if shard_name == None and key_values:
          logging.debug("RecordIO %s: No shard found for:\n%s -> %s" %
              (self.name, 
               SPLIT_CHAR.join(RecordIOShard.entry_key(key_values[0])),
               key_values[0][:-1]))
          for entry in key_values:
            shard_does_not_exist.insert(entry)
        else:
          lo_just_split = None
          hi_just_split = None
          for key_values_chunk in get_chunks(key_values, MAX_WRITE_BATCH_SIZE):
            if lo_just_split and hi_just_split and key_values_chunk:
              if RecordIORecords.in_range(key_values_chunk[0],
                                          lo=lo_just_split[0],
                                          hi=lo_just_split[1]):
                shard_name = RecordIOShard.key_name(self.name,
                                                   lo=lo_just_split[0],
                                                   hi=lo_just_split[1])
              elif RecordIORecords.in_range(key_values_chunk[0],
                                            lo=hi_just_split[0],
                                            hi=hi_just_split[1]):
                shard_name = RecordIOShard.key_name(self.name,
                                                    lo=hi_just_split[0],
                                                    hi=hi_just_split[1])
            not_deleted = None
            try:
              not_deleted, lo_just_split, hi_just_split = self.commit_shard_(
                  shard_name, key_values_chunk)
            except RecordIOShardDoesNotExistError:
              logging.debug("Shard does not exist:\n" + shard_name)
              lo_just_split = None
              hi_just_split = None
              for entry in key_values_chunk:
                shard_does_not_exist.insert(entry)
            if not_deleted:
              for to_delete_shard_name, to_delete_key_values in (
                   RecordIOShard.get_shards_for_key_values(
                       self.name, not_deleted)):
                self.db_search += 1
                try:
                  self.commit_shard_(to_delete_shard_name, to_delete_key_values)
                except RecordIOShardDoesNotExistError:
                  logging.debug("Shard does not exist:\n" + shard_name)
                  for entry in to_delete_key_values:
                    shard_does_not_exist.insert(entry)
      self.updates = shard_does_not_exist
      if len(self.updates):
        if attempt == retries:
          raise RecordIOWriterNotCompletedError(len(self.updates))
        else:
          logging.debug("Commit attempt %d failed" % attempt)
          time.sleep(retry_timeout)
      else:
        return