Пример #1
0
 def testReadFromThreeShards(self):
     recordio = RecordIOShard.create("test", hi=("1", ))
     recordio.insert(("0", STRING + "a"))
     recordio.commit()
     recordio = RecordIOShard.create("test", lo=("1", ), hi=("3", ))
     recordio.insert(("1", STRING + "b"))
     recordio.insert(("2", STRING + "c"))
     recordio.commit()
     recordio = RecordIOShard.create("test", lo=("3", ))
     recordio.insert(("3", STRING + "d"))
     recordio.commit()
     reader = RecordIOReader("test")
     self.assertEqual([("0", "a"), ("1", "b"), ("2", "c"), ("3", "d")],
                      list(reader))
     self.assertEqual([("0", "a"), ("1", "b"), ("2", "c"), ("3", "d")],
                      list(reader.read(start_key="0")))
     self.assertEqual([("0", "a"), ("1", "b"), ("2", "c"), ("3", "d")],
                      list(reader.read(end_key="4")))
     self.assertEqual([("1", "b"), ("2", "c"), ("3", "d")],
                      list(reader.read(start_key="1")))
     self.assertEqual([("2", "c"), ("3", "d")],
                      list(reader.read(start_key="2")))
     self.assertEqual([("0", "a"), ("1", "b")],
                      list(reader.read(end_key="2")))
     self.assertEqual([("1", "b"), ("2", "c")],
                      list(reader.read(start_key="1", end_key="3")))
     self.assertEqual([("1", "b")],
                      list(reader.read(start_key="1", end_key="2")))
Пример #2
0
 def testReadFromThreeShards(self):
   recordio = RecordIOShard.create("test", hi=("1",))
   recordio.insert(("0", STRING + "a"))
   recordio.commit()
   recordio = RecordIOShard.create("test", lo=("1",), hi=("3",))
   recordio.insert(("1", STRING + "b"))
   recordio.insert(("2", STRING + "c"))
   recordio.commit()
   recordio = RecordIOShard.create("test", lo=("3",))
   recordio.insert(("3", STRING + "d"))
   recordio.commit()
   reader = RecordIOReader("test")
   self.assertEqual([("0", "a"), ("1", "b"), ("2", "c"), ("3", "d")],
                    list(reader))
   self.assertEqual([("0", "a"), ("1", "b"), ("2", "c"), ("3", "d")],
                    list(reader.read(start_key="0")))
   self.assertEqual([("0", "a"), ("1", "b"), ("2", "c"), ("3", "d")],
                    list(reader.read(end_key="4")))
   self.assertEqual([("1", "b"), ("2", "c"), ("3", "d")],
                    list(reader.read(start_key="1")))
   self.assertEqual([("2", "c"), ("3", "d")],
                    list(reader.read(start_key="2")))
   self.assertEqual([("0", "a"), ("1", "b")],
                    list(reader.read(end_key="2")))
   self.assertEqual([("1", "b"), ("2", "c")],
                    list(reader.read(start_key="1", end_key="3")))
   self.assertEqual([("1", "b")],
                    list(reader.read(start_key="1", end_key="2")))
Пример #3
0
  def all_names():
    """Returns the names of all existing RecordIOs.

    :return: list of RecordIO names
    """
    for x in RecordIOShard.all(keys_only=True).filter("index =", True):
      yield RecordIOShard.get_name(x.name())
Пример #4
0
 def write2MBAndReplace(self, compressed):
   test_string = test_helper.uncompressableString(2**21)
   updater = RecordIOWriter("test")
   updater.create(compressed=compressed)
   updater.insert("test", test_string)
   updater.commit_sync()
   output = []
   entries = 0
   shards_count = 0
   for recordio in RecordIOShard.all():
     self.assertTrue(len(recordio.data) >= 1000)
     shards_count += 1
     for entry in recordio:
       output += [entry[-1]]
       entries += 1
   self.assertTrue(shards_count > 1)
   self.assertTrue(entries > 3)
   self.assertEqual("".join(output), STRING + test_string, "read != write")
   updater.insert("test", "short")
   updater.commit_sync(retries=0)
   replaced_shards_count = 0
   for recordio in RecordIOShard.all():
     if replaced_shards_count == 0:
       self.assertEqual(1, len(recordio))
       for entry in recordio:
         self.assertEqual(STRING + "short", entry[-1])
     else:
       self.assertEqual(0, len(recordio))
       for entry in recordio:
         self.fail("shouldnt be iterable")
     replaced_shards_count += 1
     self.assertTrue(len(recordio.data) < 1000)
   self.assertTrue(replaced_shards_count > 0)
   self.assertTrue(replaced_shards_count <= shards_count)
Пример #5
0
  def create(self, compressed=True, pre_split=[]):
    """Creates a RecordIO in datastore. If the RecordIO exists, nothing happens

    :param compressed: Boolean if the data in the RecordIO should be gzipped.
    :param pre_split: An optional list of keys to that should be used to
                      pre-split the internal data shards. This is only makes
                      sense if you are going to write a lot of data and you
                      already know the key range of the data and roughly how
                      many entries fit into one shard.
    :return: True, if the RecordIO didn't exist before.
    """
    self.db_search += 1
    if RecordIOShard.get_all_query(self.name, keys_only=True).get() == None:
      pre_split.sort()
      self.db_put += 1
      split = [None] + [(x,) for x in pre_split] + [None]
      split = [(split[i], split[i+1]) for i in xrange(len(split) - 1)]
      for lo, hi in split:
        index = None
        if lo == None:
          index = True
        RecordIOShard.get_or_insert(RecordIOShard.key_name(self.name,
                                                          lo=lo, hi=hi),
                                    compressed=compressed, index=index)
      return True
    return False
Пример #6
0
 def testShardNamesForKeysMissingLo(self):
   recordio_hi = RecordIOShard.create("test", lo="1")
   recordio_hi.insert(("1", "b"))
   recordio_hi.insert(("2", "c"))
   recordio_hi.commit()
   self.assertEqual({ None: [("0", )] },
                    self.getResult(RecordIOShard.get_shards_for_key_values(
                                   "test", [("0", )])))
Пример #7
0
 def testKeyName(self):
   name = RecordIOShard.key_name("te|st", ("b|b",), ("d|d",))
   self.assertEqual("te%7Cst!0!647c64!0000000000!0000000001!0000000000" +
                    "!627c62!0000000000!0000000001!0000000000", name)
   recordio = RecordIOShard.create("te|st", ("b|b", 0, 1, 0), ("d|d",))
   self.assertEqual("te|st", recordio.name())
   self.assertEqual((("b|b", 0, 1, 0),
                     ("d|d", 0, 1, 0)),
                    recordio.lo_hi())
Пример #8
0
 def testShardNamesForKeysEmpty(self):
   recordio = RecordIOShard.create("test")
   recordio.insert(("0", "a"))
   recordio.insert(("1", "b"))
   recordio.insert(("2", "c"))
   recordio.commit()
   self.assertEqual({ RecordIOShard.key_name("test"): [("", ),] },
                    self.getResult(RecordIOShard.get_shards_for_key_values(
                                   "test", [("",)])))
Пример #9
0
  def commit_shard_(self, shard_name, key_values):
    """Adds key, values to a shard and splits it if necessary.

    :param shard_name: The key name of the RecordIOShard.
    :param key_values: A list of key values to be added
    :return: list of keys that need to be deleted in other shards.
    """
    shard = RecordIOShard.get_by_key_name(shard_name)
    self.db_get += 1
    if shard == None:
      raise RecordIOShardDoesNotExistError(shard_name)
    for entry in key_values:
      shard.insert(entry)
    try:
      shard.commit()
      self.db_put += 1
      return (shard.not_deleted(), None, None)
    except (RecordIOShardTooBigError,
            RequestTooLargeError, ValueError, ArgumentError, BadRequestError):
      shard.delete()
      lo_shard, hi_shard = shard.split()
      lo_shard.commit()
      hi_shard.commit()
      self.db_put += 2
      logging.debug("Split\n%s\n%s\n%s" % (shard.key().name(),
                                            lo_shard.key().name(),
                                            hi_shard.key().name()))
      shard_name = hi_shard.key().name()
      return shard.not_deleted(), (lo_shard.lo_hi()), (hi_shard.lo_hi())
Пример #10
0
 def testReadFromInexistingLoShards(self):
     recordio_hi = RecordIOShard.create("test", lo="1")
     recordio_hi.insert(("1", STRING + "b"))
     recordio_hi.insert(("2", STRING + "c"))
     recordio_hi.commit()
     reader = RecordIOReader("test")
     self.assertRaises(RecordIOShardDoesNotExistError, self.readAll, reader)
Пример #11
0
 def testReadSplitEntries(self):
   recordio = RecordIOShard.create("test", compressed=False)
   recordio.insert(("a", STRING + "a"))
   recordio.insert(("b", 0, 1, 1, STRING + "b"))
   recordio.insert(("c", STRING + "c"))
   recordio.insert(("d", 0, 2, 1, STRING + "1"))
   recordio.insert(("d", 1, 2, 1, "2"))
   recordio.insert(("e", 0, 3, 1, STRING + "1"))
   recordio.insert(("e", 1, 3, 1, "2"))
   recordio.insert(("e", 2, 3, 1, "3"))
   recordio.insert(("f", STRING + "f"))
   recordio.insert(("g", 0, 2, 2, STRING + "1"))
   recordio.insert(("g", 1, 2, 1, "bad"))
   recordio.insert(("g", 1, 2, 2, "2"))
   recordio.insert(("g_missing_1", 0, 3, 1, STRING + "bad"))
   recordio.insert(("g_missing_1", 1, 3, 1, "bad"))
   recordio.insert(("g_missing_2", 1, 2, 1, "bad"))
   recordio.insert(("h", STRING + "h"))
   recordio.commit()
   reader = RecordIOReader("test")
   self.assertEqual([("a", "a"),
                     ("b", "b"),
                     ("c", "c"),
                     ("d", "12"),
                     ("e", "123"),
                     ("f", "f"),
                     ("g", "12"),
                     ("h", "h")], list(reader.read()))
   self.assertEqual(["g_missing_1"], reader.get_not_read())
Пример #12
0
  def insertGetAndOrder(self, compressed):
    recordio = RecordIOShard.create("test", compressed=compressed)
    recordio.insert(("a", "a"))

    test_strings = self.getStrings()
    assert(len(test_strings) > 1)
    random.shuffle(test_strings)
    for x in test_strings:
      recordio.insert((x, x))
    self.assertEqual(len(test_strings), len(recordio))
    for x in test_strings:
      recordio.insert((x, "".join(reversed(x))))
    self.assertEqual(len(test_strings), len(recordio))
    
    for i in range(0, len(test_strings), 500):
      x = test_strings[i]
      self.assertTrue(x in recordio)
      self.assertEqual(recordio[(x,)], "".join(reversed(x)))
    test_strings = self.getStrings()
    i = 0
    for key, value in recordio:
      self.assertEqual(test_strings[i], key)
      self.assertEqual("".join(reversed(test_strings[i])), value)
      i += 1
    assert("not_in" not in test_strings)
    self.assertFalse("not_in" in recordio)
Пример #13
0
 def testTaskQueue(self):
   writer = RecordIOWriter("test")
   writer.create(compressed=False)
   test_value = test_helper.uncompressableString(MAX_ENTRY_SIZE-1)
   entries_to_write = MAX_BLOB_SIZE / MAX_ENTRY_SIZE + 1
   for i in range(entries_to_write):
     writer.insert(str(i), test_value)
   writer.commit_async()
   
   taskq = self.testbed.get_stub(testbed.TASKQUEUE_SERVICE_NAME)
   tasks = taskq.GetTasks("recordio-writer")
   for task in tasks:
     url=task["url"]
     args = urlparse.parse_qs(base64.b64decode(task["body"]))
     for x in args:
       args[x] = args[x][0]
     test_helper.requestGet(WriteHandler(), url, args)
   assert(len([x for x in RecordIOShard.all()]) > 1)
   reader = RecordIOReader("test")
   result = {}
   for key, value in reader:
     result[key] = value
   self.assertEqual(len(result), entries_to_write)
   for i in range(entries_to_write):
     self.assertEqual(result[str(i)], test_value, "Not equal")
Пример #14
0
  def commit_to_queue_(self):
    """Adds all pending changes to the task queues for async commits

    :return: Yields all shard names that need to be updated.
    """
    pull = taskqueue.Queue('recordio-queue')
    rpcs = []
    key_values_not_added = RecordIORecords()
    for shard_name, key_values in RecordIOShard.get_shards_for_key_values(
          self.name, self.updates):
      self.db_search += 1
      if shard_name == None:
        for entry in key_values:
          key_values_not_added.insert(entry)
      else:
        for key_values_chunk in get_chunks(key_values, MAX_TASKQUEUE_BATCH_SIZE):
          payload = marshal.dumps(key_values_chunk, MARSHAL_VERSION)
          rpc = pull.add_async(taskqueue.Task(payload=payload, method='PULL',
                                              tag=shard_name))
          rpcs.append((rpc, key_values_chunk, shard_name))
    
    for rpc, key_values, shard_name in rpcs:
      try:
        rpc.get_result()
        yield shard_name
      except:
        for entry in key_values:
          key_values_not_added.insert(entry)
    self.updates = key_values_not_added
    if len(self.updates):
      raise RecordIOWriterNotCompletedError(len(self.updates))
Пример #15
0
  def delete(self):
    """Deletes a RecordIO.

    Modifying RecordIOs or applying queued writes may result in errors during
    deletions.
    """
    db.delete(RecordIOShard.get_all_query(self.name, keys_only=True))
Пример #16
0
    def commit_to_queue_(self):
        """Adds all pending changes to the task queues for async commits

    :return: Yields all shard names that need to be updated.
    """
        pull = taskqueue.Queue('recordio-queue')
        rpcs = []
        key_values_not_added = RecordIORecords()
        for shard_name, key_values in RecordIOShard.get_shards_for_key_values(
                self.name, self.updates):
            self.db_search += 1
            if shard_name == None:
                for entry in key_values:
                    key_values_not_added.insert(entry)
            else:
                for key_values_chunk in get_chunks(key_values,
                                                   MAX_TASKQUEUE_BATCH_SIZE):
                    payload = marshal.dumps(key_values_chunk, MARSHAL_VERSION)
                    rpc = pull.add_async(
                        taskqueue.Task(payload=payload,
                                       method='PULL',
                                       tag=shard_name))
                    rpcs.append((rpc, key_values_chunk, shard_name))

        for rpc, key_values, shard_name in rpcs:
            try:
                rpc.get_result()
                yield shard_name
            except:
                for entry in key_values:
                    key_values_not_added.insert(entry)
        self.updates = key_values_not_added
        if len(self.updates):
            raise RecordIOWriterNotCompletedError(len(self.updates))
Пример #17
0
    def commit_shard_(self, shard_name, key_values):
        """Adds key, values to a shard and splits it if necessary.

    :param shard_name: The key name of the RecordIOShard.
    :param key_values: A list of key values to be added
    :return: list of keys that need to be deleted in other shards.
    """
        shard = RecordIOShard.get_by_key_name(shard_name)
        self.db_get += 1
        if shard == None:
            raise RecordIOShardDoesNotExistError(shard_name)
        for entry in key_values:
            shard.insert(entry)
        try:
            shard.commit()
            self.db_put += 1
            return (shard.not_deleted(), None, None)
        except (RecordIOShardTooBigError, RequestTooLargeError, ValueError,
                ArgumentError, BadRequestError):
            shard.delete()
            lo_shard, hi_shard = shard.split()
            lo_shard.commit()
            hi_shard.commit()
            self.db_put += 2
            logging.debug("Split\n%s\n%s\n%s" %
                          (shard.key().name(), lo_shard.key().name(),
                           hi_shard.key().name()))
            shard_name = hi_shard.key().name()
            return shard.not_deleted(), (lo_shard.lo_hi()), (hi_shard.lo_hi())
Пример #18
0
    def delete(self):
        """Deletes a RecordIO.

    Modifying RecordIOs or applying queued writes may result in errors during
    deletions.
    """
        db.delete(RecordIOShard.get_all_query(self.name, keys_only=True))
Пример #19
0
 def testReadFromInexistingHiShards(self):
   recordio_lo = RecordIOShard.create("test", hi="1")
   recordio_lo.insert(("0", STRING + "a"))
   recordio_lo.commit()
   reader = RecordIOReader("test")
   self.assertRaises(RecordIOShardDoesNotExistError,
                     self.readAll, reader)
   
Пример #20
0
 def testReadFromInexistingLoShards(self):
   recordio_hi = RecordIOShard.create("test", lo="1")
   recordio_hi.insert(("1", STRING + "b"))
   recordio_hi.insert(("2", STRING + "c"))
   recordio_hi.commit()
   reader = RecordIOReader("test")
   self.assertRaises(RecordIOShardDoesNotExistError,
                     self.readAll, reader)
Пример #21
0
 def testGetAllQuery(self):
   RecordIOShard.create("test", hi=("a", "")).commit()
   RecordIOShard.create("test", lo=("a", ""), hi=("b", "")).commit()
   RecordIOShard.create("test", lo=("b", "")).commit()
   self.assertEqual(
       [(None, ("a", 0, 1, 0)),
        (('a', 0, 1, 0), ('b', 0, 1, 0)), 
        (('b', 0, 1, 0), None)],
       [RecordIOShard.lo_hi_from_key(x.name())
        for x in RecordIOShard.get_all_query("test", keys_only=True)])
Пример #22
0
 def testSplitEntriesSplit(self):
   recordio = RecordIOShard.create("test", compressed=False)
   recordio.insert(("b", 0, 3, 3, "bb"))
   recordio.insert(("b", 1, 3, 3, "bb"))
   recordio.insert(("b", 2, 3, 3, "bb"))
   lo_record, hi_record = recordio.split()
   self.assertEqual((None,  ('b', 2, 3, 3)),
                    lo_record.lo_hi())
   self.assertEqual((('b', 2, 3, 3), None), hi_record.lo_hi())
Пример #23
0
 def testWriteDuringSplit(self):
   recordio = RecordIOShard.create("test", compressed=False)
   recordio.insert(("1", STRING + "1"))
   recordio.insert(("2", STRING + "2"))
   lo_shard, hi_shard = recordio.split()
   lo_shard.commit()
   updater = RecordIOWriter("test")
   updater.insert("3", "3")
   self.assertRaises(RecordIOShardDoesNotExistError,
                     updater.commit_shard_,
                     hi_shard.key().name(), updater.updates)
   self.assertRaises(RecordIOWriterNotCompletedError,
                     updater.commit_sync,
                     32, 0)
   hi_shard.commit()
   updater.insert("0", STRING + "0")
   updater.commit_sync()
   lo_shard, hi_shard = [x for x in RecordIOShard.all()]
   self.assertEqual([x[0] for x in lo_shard], ["0", "1"])
   self.assertEqual([x[0] for x in hi_shard], ["2", "3"])
Пример #24
0
 def testCommitToQueue(self):
   updater = RecordIOWriter("test")
   updater.create()
   chunk_size = MAX_ENTRY_SIZE - 1
   entries_to_write = MAX_TASKQUEUE_BATCH_SIZE / MAX_ENTRY_SIZE + 1
   for i in xrange(entries_to_write):
     updater.insert(str("%09d" % i),
                    test_helper.uncompressableString(chunk_size))
   list(updater.commit_to_queue_())
   pull = taskqueue.Queue('recordio-queue')
   tasks = list(pull.lease_tasks(60, 100))
   self.assertEqual(len(tasks), 2)
   self.assertEqual(tasks[0].tag, RecordIOShard.key_name("test"))
   self.assertEqual(tasks[1].tag, RecordIOShard.key_name("test"))
   updates_0 = marshal.loads(tasks[0].payload)
   updates_1 = marshal.loads(tasks[1].payload)
   self.assertEqual([str("%09d" % x) for x in xrange(entries_to_write)],
                    [x[0] for x in updates_0] + [x[0] for x in updates_1])
   self.assertTrue(updates_0[0][1] ==
                   STRING + test_helper.uncompressableString(chunk_size))
Пример #25
0
 def testReadStringMarshalPickle(self):
     recordio = RecordIOShard.create("test")
     recordio.insert(("string", STRING + "string"))
     marshalable = {"a": [1, 2, 3, u"asd"]}
     recordio.insert(("marshal", MARSHAL + marshal.dumps(marshalable)))
     pickleable = AnyClass()
     recordio.insert(("cpickle", CPICKLE + cPickle.dumps(pickleable)))
     recordio.commit()
     reader = RecordIOReader("test")
     self.assertEqual([("cpickle", pickleable), ("marshal", marshalable),
                       ("string", "string")], list(reader))
Пример #26
0
    def read_entries_(self, start_key=None, end_key=None):
        """An internal helper function to read split entries.

    :param start_key: An entry tuple (no value needed)
    :param end_key: An entry tuple (no value needed) Exclusive.
    :return: Yields key, split_values
    """
        # TODO (andrin): fetch a couple of shards instead of just one based on
        #                method argument
        current_key = start_key
        if current_key == None:
            current_key = ("", )
        limit_shard_name = RecordIOShard.key_name(self.name,
                                                  lo=start_key,
                                                  hi=end_key).split(SPLIT_CHAR)
        while True:
            shard = RecordIOShard.get_shards_for_key_values(
                self.name, [current_key], keys_only=False).next()[0]
            self.db_search_and_get += 1
            if shard == None:
                raise RecordIOShardDoesNotExistError(self.name)
            hi = shard.lo_hi()[1]
            shard_name = shard.key().name().split(SPLIT_CHAR)
            if (shard_name[6:10] >= limit_shard_name[6:10]
                    and (shard_name[2:5] < limit_shard_name[2:5]
                         or limit_shard_name[2] == SPLIT_CHAR_AFTER)):
                # Read the whole shard
                for entry in shard:
                    yield entry
            else:
                # Read parts of the shard
                for entry in shard.read(current_key, end_key):
                    yield entry
            if hi == None:
                # Was the last shard
                return
            current_key = hi
            if (end_key != None and RecordIORecords.entry_comperator(
                    current_key, end_key) >= 0):
                # Next shard is after end_key
                return
Пример #27
0
 def testReadStringMarshalPickle(self):
   recordio = RecordIOShard.create("test")
   recordio.insert(("string", STRING + "string"))
   marshalable = {"a": [1,2,3, u"asd"]}
   recordio.insert(("marshal", MARSHAL + marshal.dumps(marshalable)))
   pickleable = AnyClass()
   recordio.insert(("cpickle", CPICKLE + cPickle.dumps(pickleable)))
   recordio.commit()
   reader = RecordIOReader("test")
   self.assertEqual([("cpickle", pickleable),
                     ("marshal", marshalable),
                     ("string", "string")], list(reader))
Пример #28
0
 def testShardNamesForShorterKeys(self):
   RecordIOShard.create("test", hi=("a", "")).commit()
   RecordIOShard.create("test", lo=("a", "")).commit()
   self.assertEqual({ RecordIOShard.key_name("test", lo=("a", "")):
                          [("aa", ),] },
                     self.getResult(RecordIOShard.get_shards_for_key_values(
                                    "test", [("aa",)])))
Пример #29
0
  def read_entries_(self, start_key=None, end_key=None):
    """An internal helper function to read split entries.

    :param start_key: An entry tuple (no value needed)
    :param end_key: An entry tuple (no value needed) Exclusive.
    :return: Yields key, split_values
    """
    # TODO (andrin): fetch a couple of shards instead of just one based on
    #                method argument
    current_key = start_key
    if current_key == None:
      current_key = ("", )
    limit_shard_name = RecordIOShard.key_name(
        self.name, lo=start_key, hi=end_key).split(SPLIT_CHAR)
    while True:
      shard = RecordIOShard.get_shards_for_key_values(
          self.name, [current_key], keys_only=False).next()[0]
      self.db_search_and_get += 1
      if shard == None:
        raise RecordIOShardDoesNotExistError(self.name)
      hi = shard.lo_hi()[1]
      shard_name = shard.key().name().split(SPLIT_CHAR)
      if (shard_name[6:10] >= limit_shard_name[6:10] and
          (shard_name[2:5] < limit_shard_name[2:5] or
           limit_shard_name[2] == SPLIT_CHAR_AFTER)):
        # Read the whole shard
        for entry in shard:
          yield entry
      else:
        # Read parts of the shard
        for entry in shard.read(current_key, end_key):
          yield entry
      if hi == None:
        # Was the last shard
        return
      current_key = hi
      if (end_key != None and
          RecordIORecords.entry_comperator(current_key, end_key) >= 0):
        # Next shard is after end_key
        return
Пример #30
0
 def testCommitToQueueAndScheduleWrite(self):
   updater = RecordIOWriter("test")
   updater.create()
   updater.insert("a", "")
   updater.commit_async()
   taskq = self.testbed.get_stub(testbed.TASKQUEUE_SERVICE_NAME)
   
   tasks = taskq.GetTasks("recordio-writer")
   self.assertEqual(len(tasks), 1)
   self.assertEqual(tasks[0]["url"], "/recordio/write")
   self.assertEqual(base64.b64decode(tasks[0]["body"]),
                    "taskqueue=" + urllib.quote(
                    RecordIOShard.key_name("test")))
Пример #31
0
 def writeOneShard(self, compressed):
   updater = RecordIOWriter("test")
   updater.create(compressed=compressed)
   updater.insert("1", "foo")
   updater.insert("2", "bar")
   updater.commit_sync()
   updater = RecordIOWriter("test")
   updater.insert("3", "win")
   updater.remove("2")
   updater.commit_sync()
   recordio = RecordIOShard.all().get()
   self.assertEqual(recordio.compressed, compressed)
   self.assertEqual([x for x in recordio], [("1", STRING + "foo"), ("3", STRING + "win")])
Пример #32
0
 def testShardNamesForKeysSplit(self):
   recordio = RecordIOShard.create("test")
   test_strings = [str(x) for x in range(10)]
   for x in test_strings:
     recordio.insert((x, test_helper.uncompressableString(2**16)))
   recordio.commit()
   self.assertEqual({ RecordIOShard.key_name("test"):
                          [("0", ""), ("1", "")] },
                    self.getResult(RecordIOShard.get_shards_for_key_values(
                                   "test", [("0", ""), ("1", "")])))
   recordio.delete()
   shard_0, shard_1 = recordio.split()
   shard_1, shard_2 = shard_1.split()
   shard_0.commit()
   shard_1.commit()
   shard_2.commit()
   self.assertEqual({ shard_0.key().name(): [('0', '0'), ('1', '1'),
                                             ('2', '2'), ('3', '3'),
                                             ('4', '4')],
                     shard_1.key().name(): [('5', '5'), ('6', '6'),
                                            ('7', '7')],
                     shard_2.key().name(): [('8', '8'), ('9', '9')]},
                     self.getResult(RecordIOShard.get_shards_for_key_values(
                                    "test", zip(test_strings, test_strings))))
Пример #33
0
 def testCommitToQueueSplitEntries(self):
   chunk_size = MAX_ENTRY_SIZE + 1
   test_string = test_helper.uncompressableString(chunk_size)
   updater = RecordIOWriter("test")
   updater.create()
   updater.insert("test", test_string)
   list(updater.commit_to_queue_())
   pull = taskqueue.Queue('recordio-queue')
   tasks = list(pull.lease_tasks(60, 100))
   self.assertEqual(len(tasks), 1)
   self.assertEqual(tasks[0].tag, RecordIOShard.key_name("test"))
   updates = marshal.loads(tasks[0].payload)
   self.assertEqual([('test', 0, 2), ('test', 1, 2)],
                    [x[:-2] for x in updates])
   self.assertEqual(STRING + test_string, "".join([x[-1] for x in updates]))
Пример #34
0
 def testWriteStringMarshalPickle(self):
   updater = RecordIOWriter("test")
   updater.create()
   updater.insert("string", "string")
   marshalable = {"a": [1,2,3]}
   updater.insert("marshal", marshalable)
   class AnyClass():
     pass
   pickleable = AnyClass()
   updater.insert("cpickle", pickleable)
   updater.commit_sync()
   recordio = RecordIOShard.all().get()
   self.assertEqual([x for x in recordio],
                    [("cpickle", CPICKLE + cPickle.dumps(pickleable)),
                     ("marshal", MARSHAL + marshal.dumps(marshalable)),
                     ("string", STRING + "string")])
Пример #35
0
 def testSplit(self):
   recordio = RecordIOShard.create("test")
   test_strings = ["c", "a", "b", "d", "e"]
   for x in test_strings:
     recordio.insert((x, test_helper.uncompressableString(ZIP_CHUNKS)))
   lo_record, hi_record = recordio.split()
   self.assertEqual(3, len(lo_record))
   self.assertEqual(2, len(hi_record))
   for x in test_strings:
     self.assertTrue(x in lo_record or x in hi_record)
   self.assertTrue(max(lo_record) < min(hi_record))
   self.assertEqual("test", lo_record.name())
   self.assertEqual((None, ('d', 0, 1, 0)), lo_record.lo_hi())
   self.assertEqual(["a", "b", "c"], [x[0] for x in lo_record])
   self.assertEqual("test", hi_record.name())
   self.assertEqual((('d', 0, 1, 0), None),  hi_record.lo_hi())
   self.assertEqual(["d", "e"], [x[0] for x in hi_record])
Пример #36
0
 def readFromnOneShard(self, compressed):
     recordio = RecordIOShard.create("test", compressed=compressed)
     recordio.insert(("0", STRING + "a"))
     recordio.insert(("1", STRING + "b"))
     recordio.insert(("2", STRING + "c"))
     recordio.commit()
     reader = RecordIOReader("test")
     self.assertEqual([("0", "a"), ("1", "b"), ("2", "c")], list(reader))
     self.assertEqual([("0", "a"), ("1", "b"), ("2", "c")],
                      list(reader.read(start_key="0")))
     self.assertEqual([("0", "a"), ("1", "b"), ("2", "c")],
                      list(reader.read(end_key="3")))
     self.assertEqual([("1", "b"), ("2", "c")],
                      list(reader.read(start_key="1")))
     self.assertEqual([("0", "a"), ("1", "b")],
                      list(reader.read(end_key="2")))
     self.assertEqual([("1", "b")],
                      list(reader.read(start_key="1", end_key="2")))
     self.assertTrue("0" in reader)
     self.assertFalse("3" in reader)
     self.assertEqual(reader["0"], "a")
Пример #37
0
 def readFromnOneShard(self, compressed):
   recordio = RecordIOShard.create("test", compressed=compressed)
   recordio.insert(("0", STRING + "a"))
   recordio.insert(("1", STRING + "b"))
   recordio.insert(("2", STRING + "c"))
   recordio.commit()
   reader = RecordIOReader("test")
   self.assertEqual([("0", "a"), ("1", "b"), ("2", "c")], list(reader))
   self.assertEqual([("0", "a"), ("1", "b"), ("2", "c")],
                    list(reader.read(start_key="0")))
   self.assertEqual([("0", "a"), ("1", "b"), ("2", "c")],
                    list(reader.read(end_key="3")))
   self.assertEqual([("1", "b"), ("2", "c")],
                    list(reader.read(start_key="1")))
   self.assertEqual([("0", "a"), ("1", "b")],
                    list(reader.read(end_key="2")))
   self.assertEqual([("1", "b")],
                    list(reader.read(start_key="1", end_key="2")))
   self.assertTrue("0" in reader)
   self.assertFalse("3" in reader)
   self.assertEqual(reader["0"], "a")
Пример #38
0
    def commit_batch(self, tag, batch):
        """Applies a batch of values to a RecordIO and deletes the taskqueue task,

    :param tag: The current tag we are working on
    :param batch: A list of (tasqueue_task, key_value_list)
    :return: True on success
    """
        if batch:
            done_tasks = []
            count = 0
            writer = RecordIOWriter(RecordIOShard.get_name(tag))
            for done_task, key_values in batch:
                done_tasks.append(done_task)
                for entry in key_values:
                    writer.insert_entry_(entry)
                    count += 1
            try:
                writer.commit_sync(retries=1)
                try:
                    self.pull.delete_tasks(done_tasks)
                except taskqueue.BadTaskStateError:
                    for task in done_tasks:
                        if task.was_deleted:
                            continue
                        try:
                            self.pull.delete_tasks(task)
                        except taskqueue.BadTaskStateError:
                            logging.debug(
                                "RecordIO Failed to free task %s on %s" %
                                task.name, tag)
                logging.debug("RecordIO wrote %d entries to %s" %
                              (count, writer.name))
            except RecordIOWriterNotCompletedError:
                logging.debug("RecordIO not completed on: %s" % tag)
                for task in done_tasks:
                    self.pull.modify_task_lease(task, 0)
                return False
        return True
Пример #39
0
  def commit_batch(self, tag, batch):
    """Applies a batch of values to a RecordIO and deletes the taskqueue task,

    :param tag: The current tag we are working on
    :param batch: A list of (tasqueue_task, key_value_list)
    :return: True on success
    """
    if batch:
      done_tasks = []
      count = 0
      writer = RecordIOWriter(RecordIOShard.get_name(tag))
      for done_task, key_values in batch:
        done_tasks.append(done_task)
        for entry in key_values:
          writer.insert_entry_(entry)
          count += 1
      try:
        writer.commit_sync(retries=1)
        try:
          self.pull.delete_tasks(done_tasks)
        except taskqueue.BadTaskStateError:
          for task in done_tasks:
            if task.was_deleted:
              continue
            try:
              self.pull.delete_tasks(task)
            except taskqueue.BadTaskStateError:
              logging.debug("RecordIO Failed to free task %s on %s" %
                            task.name, tag)
        logging.debug("RecordIO wrote %d entries to %s" %
                      (count, writer.name))
      except RecordIOWriterNotCompletedError:
        logging.debug("RecordIO not completed on: %s" % tag)
        for task in done_tasks:
          self.pull.modify_task_lease(task, 0)
        return False
    return True
Пример #40
0
 def testReadSplitEntries(self):
     recordio = RecordIOShard.create("test", compressed=False)
     recordio.insert(("a", STRING + "a"))
     recordio.insert(("b", 0, 1, 1, STRING + "b"))
     recordio.insert(("c", STRING + "c"))
     recordio.insert(("d", 0, 2, 1, STRING + "1"))
     recordio.insert(("d", 1, 2, 1, "2"))
     recordio.insert(("e", 0, 3, 1, STRING + "1"))
     recordio.insert(("e", 1, 3, 1, "2"))
     recordio.insert(("e", 2, 3, 1, "3"))
     recordio.insert(("f", STRING + "f"))
     recordio.insert(("g", 0, 2, 2, STRING + "1"))
     recordio.insert(("g", 1, 2, 1, "bad"))
     recordio.insert(("g", 1, 2, 2, "2"))
     recordio.insert(("g_missing_1", 0, 3, 1, STRING + "bad"))
     recordio.insert(("g_missing_1", 1, 3, 1, "bad"))
     recordio.insert(("g_missing_2", 1, 2, 1, "bad"))
     recordio.insert(("h", STRING + "h"))
     recordio.commit()
     reader = RecordIOReader("test")
     self.assertEqual([("a", "a"), ("b", "b"), ("c", "c"), ("d", "12"),
                       ("e", "123"), ("f", "f"), ("g", "12"), ("h", "h")],
                      list(reader.read()))
     self.assertEqual(["g_missing_1"], reader.get_not_read())
Пример #41
0
  def commit_sync(self, retries=32, retry_timeout=1):
    """Applies all changes synchronously to the RecordIO.

    :param retries: How many times a commit_sync should be retried in case of
                    datastore collisions.
    :param retry_timeout: The amount of second to wait before the next retry.
    """
    if not len(self.updates):
      return
    for attempt in range(retries + 1):
      shard_does_not_exist = RecordIORecords()
      for shard_name, key_values in RecordIOShard.get_shards_for_key_values(
          self.name, self.updates):
        self.db_search += 1
        if shard_name == None and key_values:
          logging.debug("RecordIO %s: No shard found for:\n%s -> %s" %
              (self.name, 
               SPLIT_CHAR.join(RecordIOShard.entry_key(key_values[0])),
               key_values[0][:-1]))
          for entry in key_values:
            shard_does_not_exist.insert(entry)
        else:
          lo_just_split = None
          hi_just_split = None
          for key_values_chunk in get_chunks(key_values, MAX_WRITE_BATCH_SIZE):
            if lo_just_split and hi_just_split and key_values_chunk:
              if RecordIORecords.in_range(key_values_chunk[0],
                                          lo=lo_just_split[0],
                                          hi=lo_just_split[1]):
                shard_name = RecordIOShard.key_name(self.name,
                                                   lo=lo_just_split[0],
                                                   hi=lo_just_split[1])
              elif RecordIORecords.in_range(key_values_chunk[0],
                                            lo=hi_just_split[0],
                                            hi=hi_just_split[1]):
                shard_name = RecordIOShard.key_name(self.name,
                                                    lo=hi_just_split[0],
                                                    hi=hi_just_split[1])
            not_deleted = None
            try:
              not_deleted, lo_just_split, hi_just_split = self.commit_shard_(
                  shard_name, key_values_chunk)
            except RecordIOShardDoesNotExistError:
              logging.debug("Shard does not exist:\n" + shard_name)
              lo_just_split = None
              hi_just_split = None
              for entry in key_values_chunk:
                shard_does_not_exist.insert(entry)
            if not_deleted:
              for to_delete_shard_name, to_delete_key_values in (
                   RecordIOShard.get_shards_for_key_values(
                       self.name, not_deleted)):
                self.db_search += 1
                try:
                  self.commit_shard_(to_delete_shard_name, to_delete_key_values)
                except RecordIOShardDoesNotExistError:
                  logging.debug("Shard does not exist:\n" + shard_name)
                  for entry in to_delete_key_values:
                    shard_does_not_exist.insert(entry)
      self.updates = shard_does_not_exist
      if len(self.updates):
        if attempt == retries:
          raise RecordIOWriterNotCompletedError(len(self.updates))
        else:
          logging.debug("Commit attempt %d failed" % attempt)
          time.sleep(retry_timeout)
      else:
        return
Пример #42
0
 def testShardNamesForKeysNone(self):
   self.assertEqual({ None: [("0", ""), ("1", "")] },
                     self.getResult(RecordIOShard.get_shards_for_key_values(
                                    "test", [("0", ""), ("1", "")])))
Пример #43
0
    def commit_sync(self, retries=32, retry_timeout=1):
        """Applies all changes synchronously to the RecordIO.

    :param retries: How many times a commit_sync should be retried in case of
                    datastore collisions.
    :param retry_timeout: The amount of second to wait before the next retry.
    """
        if not len(self.updates):
            return
        for attempt in range(retries + 1):
            shard_does_not_exist = RecordIORecords()
            for shard_name, key_values in RecordIOShard.get_shards_for_key_values(
                    self.name, self.updates):
                self.db_search += 1
                if shard_name == None and key_values:
                    logging.debug(
                        "RecordIO %s: No shard found for:\n%s -> %s" %
                        (self.name,
                         SPLIT_CHAR.join(RecordIOShard.entry_key(
                             key_values[0])), key_values[0][:-1]))
                    for entry in key_values:
                        shard_does_not_exist.insert(entry)
                else:
                    lo_just_split = None
                    hi_just_split = None
                    for key_values_chunk in get_chunks(key_values,
                                                       MAX_WRITE_BATCH_SIZE):
                        if lo_just_split and hi_just_split and key_values_chunk:
                            if RecordIORecords.in_range(key_values_chunk[0],
                                                        lo=lo_just_split[0],
                                                        hi=lo_just_split[1]):
                                shard_name = RecordIOShard.key_name(
                                    self.name,
                                    lo=lo_just_split[0],
                                    hi=lo_just_split[1])
                            elif RecordIORecords.in_range(key_values_chunk[0],
                                                          lo=hi_just_split[0],
                                                          hi=hi_just_split[1]):
                                shard_name = RecordIOShard.key_name(
                                    self.name,
                                    lo=hi_just_split[0],
                                    hi=hi_just_split[1])
                        not_deleted = None
                        try:
                            not_deleted, lo_just_split, hi_just_split = self.commit_shard_(
                                shard_name, key_values_chunk)
                        except RecordIOShardDoesNotExistError:
                            logging.debug("Shard does not exist:\n" +
                                          shard_name)
                            lo_just_split = None
                            hi_just_split = None
                            for entry in key_values_chunk:
                                shard_does_not_exist.insert(entry)
                        if not_deleted:
                            for to_delete_shard_name, to_delete_key_values in (
                                    RecordIOShard.get_shards_for_key_values(
                                        self.name, not_deleted)):
                                self.db_search += 1
                                try:
                                    self.commit_shard_(to_delete_shard_name,
                                                       to_delete_key_values)
                                except RecordIOShardDoesNotExistError:
                                    logging.debug("Shard does not exist:\n" +
                                                  shard_name)
                                    for entry in to_delete_key_values:
                                        shard_does_not_exist.insert(entry)
            self.updates = shard_does_not_exist
            if len(self.updates):
                if attempt == retries:
                    raise RecordIOWriterNotCompletedError(len(self.updates))
                else:
                    logging.debug("Commit attempt %d failed" % attempt)
                    time.sleep(retry_timeout)
            else:
                return
Пример #44
0
 def testReadFromInexistingHiShards(self):
     recordio_lo = RecordIOShard.create("test", hi="1")
     recordio_lo.insert(("0", STRING + "a"))
     recordio_lo.commit()
     reader = RecordIOReader("test")
     self.assertRaises(RecordIOShardDoesNotExistError, self.readAll, reader)