def testReadFromThreeShards(self): recordio = RecordIOShard.create("test", hi=("1",)) recordio.insert(("0", STRING + "a")) recordio.commit() recordio = RecordIOShard.create("test", lo=("1",), hi=("3",)) recordio.insert(("1", STRING + "b")) recordio.insert(("2", STRING + "c")) recordio.commit() recordio = RecordIOShard.create("test", lo=("3",)) recordio.insert(("3", STRING + "d")) recordio.commit() reader = RecordIOReader("test") self.assertEqual([("0", "a"), ("1", "b"), ("2", "c"), ("3", "d")], list(reader)) self.assertEqual([("0", "a"), ("1", "b"), ("2", "c"), ("3", "d")], list(reader.read(start_key="0"))) self.assertEqual([("0", "a"), ("1", "b"), ("2", "c"), ("3", "d")], list(reader.read(end_key="4"))) self.assertEqual([("1", "b"), ("2", "c"), ("3", "d")], list(reader.read(start_key="1"))) self.assertEqual([("2", "c"), ("3", "d")], list(reader.read(start_key="2"))) self.assertEqual([("0", "a"), ("1", "b")], list(reader.read(end_key="2"))) self.assertEqual([("1", "b"), ("2", "c")], list(reader.read(start_key="1", end_key="3"))) self.assertEqual([("1", "b")], list(reader.read(start_key="1", end_key="2")))
def testReadFromThreeShards(self): recordio = RecordIOShard.create("test", hi=("1", )) recordio.insert(("0", STRING + "a")) recordio.commit() recordio = RecordIOShard.create("test", lo=("1", ), hi=("3", )) recordio.insert(("1", STRING + "b")) recordio.insert(("2", STRING + "c")) recordio.commit() recordio = RecordIOShard.create("test", lo=("3", )) recordio.insert(("3", STRING + "d")) recordio.commit() reader = RecordIOReader("test") self.assertEqual([("0", "a"), ("1", "b"), ("2", "c"), ("3", "d")], list(reader)) self.assertEqual([("0", "a"), ("1", "b"), ("2", "c"), ("3", "d")], list(reader.read(start_key="0"))) self.assertEqual([("0", "a"), ("1", "b"), ("2", "c"), ("3", "d")], list(reader.read(end_key="4"))) self.assertEqual([("1", "b"), ("2", "c"), ("3", "d")], list(reader.read(start_key="1"))) self.assertEqual([("2", "c"), ("3", "d")], list(reader.read(start_key="2"))) self.assertEqual([("0", "a"), ("1", "b")], list(reader.read(end_key="2"))) self.assertEqual([("1", "b"), ("2", "c")], list(reader.read(start_key="1", end_key="3"))) self.assertEqual([("1", "b")], list(reader.read(start_key="1", end_key="2")))
def testShardNamesForShorterKeys(self): RecordIOShard.create("test", hi=("a", "")).commit() RecordIOShard.create("test", lo=("a", "")).commit() self.assertEqual({ RecordIOShard.key_name("test", lo=("a", "")): [("aa", ),] }, self.getResult(RecordIOShard.get_shards_for_key_values( "test", [("aa",)])))
def testShardNamesForKeysMissingHi(self): recordio_lo = RecordIOShard.create("test", hi="1") recordio_lo.insert(("0", "a")) recordio_lo.commit() self.assertEqual({ None: [("1", )] }, self.getResult(RecordIOShard.get_shards_for_key_values( "test", [("1", )])))
def insertGetAndOrder(self, compressed): recordio = RecordIOShard.create("test", compressed=compressed) recordio.insert(("a", "a")) test_strings = self.getStrings() assert(len(test_strings) > 1) random.shuffle(test_strings) for x in test_strings: recordio.insert((x, x)) self.assertEqual(len(test_strings), len(recordio)) for x in test_strings: recordio.insert((x, "".join(reversed(x)))) self.assertEqual(len(test_strings), len(recordio)) for i in range(0, len(test_strings), 500): x = test_strings[i] self.assertTrue(x in recordio) self.assertEqual(recordio[(x,)], "".join(reversed(x))) test_strings = self.getStrings() i = 0 for key, value in recordio: self.assertEqual(test_strings[i], key) self.assertEqual("".join(reversed(test_strings[i])), value) i += 1 assert("not_in" not in test_strings) self.assertFalse("not_in" in recordio)
def testReadFromInexistingLoShards(self): recordio_hi = RecordIOShard.create("test", lo="1") recordio_hi.insert(("1", STRING + "b")) recordio_hi.insert(("2", STRING + "c")) recordio_hi.commit() reader = RecordIOReader("test") self.assertRaises(RecordIOShardDoesNotExistError, self.readAll, reader)
def testReadSplitEntries(self): recordio = RecordIOShard.create("test", compressed=False) recordio.insert(("a", STRING + "a")) recordio.insert(("b", 0, 1, 1, STRING + "b")) recordio.insert(("c", STRING + "c")) recordio.insert(("d", 0, 2, 1, STRING + "1")) recordio.insert(("d", 1, 2, 1, "2")) recordio.insert(("e", 0, 3, 1, STRING + "1")) recordio.insert(("e", 1, 3, 1, "2")) recordio.insert(("e", 2, 3, 1, "3")) recordio.insert(("f", STRING + "f")) recordio.insert(("g", 0, 2, 2, STRING + "1")) recordio.insert(("g", 1, 2, 1, "bad")) recordio.insert(("g", 1, 2, 2, "2")) recordio.insert(("g_missing_1", 0, 3, 1, STRING + "bad")) recordio.insert(("g_missing_1", 1, 3, 1, "bad")) recordio.insert(("g_missing_2", 1, 2, 1, "bad")) recordio.insert(("h", STRING + "h")) recordio.commit() reader = RecordIOReader("test") self.assertEqual([("a", "a"), ("b", "b"), ("c", "c"), ("d", "12"), ("e", "123"), ("f", "f"), ("g", "12"), ("h", "h")], list(reader.read())) self.assertEqual(["g_missing_1"], reader.get_not_read())
def testReadFromInexistingHiShards(self): recordio_lo = RecordIOShard.create("test", hi="1") recordio_lo.insert(("0", STRING + "a")) recordio_lo.commit() reader = RecordIOReader("test") self.assertRaises(RecordIOShardDoesNotExistError, self.readAll, reader)
def testReadFromInexistingLoShards(self): recordio_hi = RecordIOShard.create("test", lo="1") recordio_hi.insert(("1", STRING + "b")) recordio_hi.insert(("2", STRING + "c")) recordio_hi.commit() reader = RecordIOReader("test") self.assertRaises(RecordIOShardDoesNotExistError, self.readAll, reader)
def testShardNamesForKeysEmpty(self): recordio = RecordIOShard.create("test") recordio.insert(("0", "a")) recordio.insert(("1", "b")) recordio.insert(("2", "c")) recordio.commit() self.assertEqual({ RecordIOShard.key_name("test"): [("", ),] }, self.getResult(RecordIOShard.get_shards_for_key_values( "test", [("",)])))
def testKeyName(self): name = RecordIOShard.key_name("te|st", ("b|b",), ("d|d",)) self.assertEqual("te%7Cst!0!647c64!0000000000!0000000001!0000000000" + "!627c62!0000000000!0000000001!0000000000", name) recordio = RecordIOShard.create("te|st", ("b|b", 0, 1, 0), ("d|d",)) self.assertEqual("te|st", recordio.name()) self.assertEqual((("b|b", 0, 1, 0), ("d|d", 0, 1, 0)), recordio.lo_hi())
def testSplitEntriesSplit(self): recordio = RecordIOShard.create("test", compressed=False) recordio.insert(("b", 0, 3, 3, "bb")) recordio.insert(("b", 1, 3, 3, "bb")) recordio.insert(("b", 2, 3, 3, "bb")) lo_record, hi_record = recordio.split() self.assertEqual((None, ('b', 2, 3, 3)), lo_record.lo_hi()) self.assertEqual((('b', 2, 3, 3), None), hi_record.lo_hi())
def testReadStringMarshalPickle(self): recordio = RecordIOShard.create("test") recordio.insert(("string", STRING + "string")) marshalable = {"a": [1, 2, 3, u"asd"]} recordio.insert(("marshal", MARSHAL + marshal.dumps(marshalable))) pickleable = AnyClass() recordio.insert(("cpickle", CPICKLE + cPickle.dumps(pickleable))) recordio.commit() reader = RecordIOReader("test") self.assertEqual([("cpickle", pickleable), ("marshal", marshalable), ("string", "string")], list(reader))
def testReadStringMarshalPickle(self): recordio = RecordIOShard.create("test") recordio.insert(("string", STRING + "string")) marshalable = {"a": [1,2,3, u"asd"]} recordio.insert(("marshal", MARSHAL + marshal.dumps(marshalable))) pickleable = AnyClass() recordio.insert(("cpickle", CPICKLE + cPickle.dumps(pickleable))) recordio.commit() reader = RecordIOReader("test") self.assertEqual([("cpickle", pickleable), ("marshal", marshalable), ("string", "string")], list(reader))
def testGetAllQuery(self): RecordIOShard.create("test", hi=("a", "")).commit() RecordIOShard.create("test", lo=("a", ""), hi=("b", "")).commit() RecordIOShard.create("test", lo=("b", "")).commit() self.assertEqual( [(None, ("a", 0, 1, 0)), (('a', 0, 1, 0), ('b', 0, 1, 0)), (('b', 0, 1, 0), None)], [RecordIOShard.lo_hi_from_key(x.name()) for x in RecordIOShard.get_all_query("test", keys_only=True)])
def testSplit(self): recordio = RecordIOShard.create("test") test_strings = ["c", "a", "b", "d", "e"] for x in test_strings: recordio.insert((x, test_helper.uncompressableString(ZIP_CHUNKS))) lo_record, hi_record = recordio.split() self.assertEqual(3, len(lo_record)) self.assertEqual(2, len(hi_record)) for x in test_strings: self.assertTrue(x in lo_record or x in hi_record) self.assertTrue(max(lo_record) < min(hi_record)) self.assertEqual("test", lo_record.name()) self.assertEqual((None, ('d', 0, 1, 0)), lo_record.lo_hi()) self.assertEqual(["a", "b", "c"], [x[0] for x in lo_record]) self.assertEqual("test", hi_record.name()) self.assertEqual((('d', 0, 1, 0), None), hi_record.lo_hi()) self.assertEqual(["d", "e"], [x[0] for x in hi_record])
def testWriteDuringSplit(self): recordio = RecordIOShard.create("test", compressed=False) recordio.insert(("1", STRING + "1")) recordio.insert(("2", STRING + "2")) lo_shard, hi_shard = recordio.split() lo_shard.commit() updater = RecordIOWriter("test") updater.insert("3", "3") self.assertRaises(RecordIOShardDoesNotExistError, updater.commit_shard_, hi_shard.key().name(), updater.updates) self.assertRaises(RecordIOWriterNotCompletedError, updater.commit_sync, 32, 0) hi_shard.commit() updater.insert("0", STRING + "0") updater.commit_sync() lo_shard, hi_shard = [x for x in RecordIOShard.all()] self.assertEqual([x[0] for x in lo_shard], ["0", "1"]) self.assertEqual([x[0] for x in hi_shard], ["2", "3"])
def readFromnOneShard(self, compressed): recordio = RecordIOShard.create("test", compressed=compressed) recordio.insert(("0", STRING + "a")) recordio.insert(("1", STRING + "b")) recordio.insert(("2", STRING + "c")) recordio.commit() reader = RecordIOReader("test") self.assertEqual([("0", "a"), ("1", "b"), ("2", "c")], list(reader)) self.assertEqual([("0", "a"), ("1", "b"), ("2", "c")], list(reader.read(start_key="0"))) self.assertEqual([("0", "a"), ("1", "b"), ("2", "c")], list(reader.read(end_key="3"))) self.assertEqual([("1", "b"), ("2", "c")], list(reader.read(start_key="1"))) self.assertEqual([("0", "a"), ("1", "b")], list(reader.read(end_key="2"))) self.assertEqual([("1", "b")], list(reader.read(start_key="1", end_key="2"))) self.assertTrue("0" in reader) self.assertFalse("3" in reader) self.assertEqual(reader["0"], "a")
def readFromnOneShard(self, compressed): recordio = RecordIOShard.create("test", compressed=compressed) recordio.insert(("0", STRING + "a")) recordio.insert(("1", STRING + "b")) recordio.insert(("2", STRING + "c")) recordio.commit() reader = RecordIOReader("test") self.assertEqual([("0", "a"), ("1", "b"), ("2", "c")], list(reader)) self.assertEqual([("0", "a"), ("1", "b"), ("2", "c")], list(reader.read(start_key="0"))) self.assertEqual([("0", "a"), ("1", "b"), ("2", "c")], list(reader.read(end_key="3"))) self.assertEqual([("1", "b"), ("2", "c")], list(reader.read(start_key="1"))) self.assertEqual([("0", "a"), ("1", "b")], list(reader.read(end_key="2"))) self.assertEqual([("1", "b")], list(reader.read(start_key="1", end_key="2"))) self.assertTrue("0" in reader) self.assertFalse("3" in reader) self.assertEqual(reader["0"], "a")
def testReadSplitEntries(self): recordio = RecordIOShard.create("test", compressed=False) recordio.insert(("a", STRING + "a")) recordio.insert(("b", 0, 1, 1, STRING + "b")) recordio.insert(("c", STRING + "c")) recordio.insert(("d", 0, 2, 1, STRING + "1")) recordio.insert(("d", 1, 2, 1, "2")) recordio.insert(("e", 0, 3, 1, STRING + "1")) recordio.insert(("e", 1, 3, 1, "2")) recordio.insert(("e", 2, 3, 1, "3")) recordio.insert(("f", STRING + "f")) recordio.insert(("g", 0, 2, 2, STRING + "1")) recordio.insert(("g", 1, 2, 1, "bad")) recordio.insert(("g", 1, 2, 2, "2")) recordio.insert(("g_missing_1", 0, 3, 1, STRING + "bad")) recordio.insert(("g_missing_1", 1, 3, 1, "bad")) recordio.insert(("g_missing_2", 1, 2, 1, "bad")) recordio.insert(("h", STRING + "h")) recordio.commit() reader = RecordIOReader("test") self.assertEqual([("a", "a"), ("b", "b"), ("c", "c"), ("d", "12"), ("e", "123"), ("f", "f"), ("g", "12"), ("h", "h")], list(reader.read())) self.assertEqual(["g_missing_1"], reader.get_not_read())
def testShardNamesForKeysSplit(self): recordio = RecordIOShard.create("test") test_strings = [str(x) for x in range(10)] for x in test_strings: recordio.insert((x, test_helper.uncompressableString(2**16))) recordio.commit() self.assertEqual({ RecordIOShard.key_name("test"): [("0", ""), ("1", "")] }, self.getResult(RecordIOShard.get_shards_for_key_values( "test", [("0", ""), ("1", "")]))) recordio.delete() shard_0, shard_1 = recordio.split() shard_1, shard_2 = shard_1.split() shard_0.commit() shard_1.commit() shard_2.commit() self.assertEqual({ shard_0.key().name(): [('0', '0'), ('1', '1'), ('2', '2'), ('3', '3'), ('4', '4')], shard_1.key().name(): [('5', '5'), ('6', '6'), ('7', '7')], shard_2.key().name(): [('8', '8'), ('9', '9')]}, self.getResult(RecordIOShard.get_shards_for_key_values( "test", zip(test_strings, test_strings))))
def testReadFromInexistingHiShards(self): recordio_lo = RecordIOShard.create("test", hi="1") recordio_lo.insert(("0", STRING + "a")) recordio_lo.commit() reader = RecordIOReader("test") self.assertRaises(RecordIOShardDoesNotExistError, self.readAll, reader)