class test_tiered05(wttest.WiredTigerTestCase): storage_sources = [ ('dir_store', dict(auth_token=get_auth_token('dir_store'), bucket=get_bucket1_name('dir_store'), bucket_prefix="pfx_", ss_name='dir_store')), ('s3', dict(auth_token=get_auth_token('s3_store'), bucket=get_bucket1_name('s3_store'), bucket_prefix=generate_s3_prefix(), ss_name='s3_store')), ] # Make scenarios for different cloud service providers scenarios = make_scenarios(storage_sources) uri = "table:test_tiered05" wait = 2 def conn_extensions(self, extlist): config = '' # S3 store is built as an optional loadable extension, not all test environments build S3. if self.ss_name == 's3_store': #config = '=(config=\"(verbose=1)\")' extlist.skip_if_missing = True #if self.ss_name == 'dir_store': #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")' # Windows doesn't support dynamically loaded extension libraries. if os.name == 'nt': extlist.skip_if_missing = True extlist.extension('storage_sources', self.ss_name + config) def conn_config(self): if self.ss_name == 'dir_store' and not os.path.exists(self.bucket): os.mkdir(self.bucket) return \ 'tiered_manager=(wait=%d),' % self.wait + \ 'tiered_storage=(auth_token=%s,' % self.auth_token + \ 'bucket=%s,' % self.bucket + \ 'bucket_prefix=%s,' % self.bucket_prefix + \ 'name=%s,' % self.ss_name + \ 'object_target_size=20M)' # Test calling the flush_tier API with a tiered manager. Should get an error. def test_tiered(self): self.session.create(self.uri, 'key_format=S') # Allow time for the thread to start up. time.sleep(self.wait) msg = "/storage manager thread is configured/" self.assertRaisesWithMessage( wiredtiger.WiredTigerError, lambda: self.assertEquals(self.session.flush_tier(None), 0), msg)
class test_tiered09(wttest.WiredTigerTestCase): storage_sources = [ ('dir_store', dict(auth_token=get_auth_token('dir_store'), bucket=get_bucket1_name('dir_store'), prefix1='1_', prefix2='2_', prefix3='3_', ss_name='dir_store')), ('s3', dict(auth_token=get_auth_token('s3_store'), bucket=get_bucket1_name('s3_store'), prefix1=generate_s3_prefix(), prefix2=generate_s3_prefix(), prefix3=generate_s3_prefix(), ss_name='s3_store')), ] # Make scenarios for different cloud service providers scenarios = make_scenarios(storage_sources) # If the 'uri' changes all the other names must change with it. base = 'test_tiered09-000000000' base2 = 'test_second09-000000000' obj1file = base + '1.wtobj' obj1second = base2 + '1.wtobj' obj2file = base + '2.wtobj' uri = "table:test_tiered09" uri2 = "table:test_second09" retention = 1 saved_conn = '' def conn_config(self): if self.ss_name == 'dir_store' and not os.path.exists(self.bucket): os.mkdir(self.bucket) self.saved_conn = \ 'debug_mode=(flush_checkpoint=true),' + \ 'statistics=(all),' + \ 'tiered_storage=(auth_token=%s,' % self.auth_token + \ 'bucket=%s,' % self.bucket + \ 'bucket_prefix=%s,' % self.prefix1 + \ 'local_retention=%d,' % self.retention + \ 'name=%s)' % self.ss_name return self.saved_conn # Load the storage store extension. def conn_extensions(self, extlist): config = '' # S3 store is built as an optional loadable extension, not all test environments build S3. if self.ss_name == 's3_store': #config = '=(config=\"(verbose=1)\")' extlist.skip_if_missing = True #if self.ss_name == 'dir_store': #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")' # Windows doesn't support dynamically loaded extension libraries. if os.name == 'nt': extlist.skip_if_missing = True extlist.extension('storage_sources', self.ss_name + config) def check(self, tc, n): for i in range(0, n): self.assertEqual(tc[str(i)], str(i)) tc.set_key(str(n)) self.assertEquals(tc.search(), wiredtiger.WT_NOTFOUND) # Test calling the flush_tier API. def test_tiered(self): # Create a table. Add some data. Checkpoint and flush tier. # Close the connection. Then we want to reopen the connection # with a different bucket prefix and repeat. Then reopen the # connection with the original prefix. Then reopen and verify # we can read all the data. # # Verify the files are as we expect also. We expect: # 1_<tablename>-00000001.wtobj # 2_<tablename>-00000002.wtobj # 1_<tablename>-00000003.wtobj # but we can read and access all data in all objects. self.session.create(self.uri, 'key_format=S,value_format=S,') # Add first data. Checkpoint, flush and close the connection. c = self.session.open_cursor(self.uri) c["0"] = "0" self.check(c, 1) c.close() self.session.checkpoint() self.session.flush_tier(None) self.close_conn() # For directory store, check that the expected files exist. if self.ss_name == 'dir_store': self.assertTrue(os.path.exists(self.obj2file)) bucket_obj = os.path.join(self.bucket, self.prefix1 + self.obj1file) self.assertTrue(os.path.exists(bucket_obj)) # Since we've closed and reopened the connection we lost the work units # to drop the local objects. Clean them up now to make sure we can open # the correct object in the bucket. localobj = './' + self.obj1file if os.path.exists(localobj): os.remove(localobj) # Reopen the connection with a different prefix this time. conn_params = self.saved_conn + ',tiered_storage=(bucket_prefix=%s)' % self.prefix2 self.conn = self.wiredtiger_open('.', conn_params) self.session = self.conn.open_session() # Add a second table created while the second prefix is used for the connection. self.session.create(self.uri2, 'key_format=S,value_format=S,') # Add first data. Checkpoint, flush and close the connection. c = self.session.open_cursor(self.uri2) c["0"] = "0" self.check(c, 1) c.close() # Add more data to original table. # Checkpoint, flush and close the connection. c = self.session.open_cursor(self.uri) c["1"] = "1" self.check(c, 2) c.close() self.session.checkpoint() self.session.flush_tier(None) self.close_conn() # For directory store, Check each table was created with the correct prefix. if self.ss_name == 'dir_store': bucket_obj = os.path.join(self.bucket, self.prefix2 + self.obj1second) self.assertTrue(os.path.exists(bucket_obj)) bucket_obj = os.path.join(self.bucket, self.prefix1 + self.obj2file) self.assertTrue(os.path.exists(bucket_obj)) # Since we've closed and reopened the connection we lost the work units # to drop the local objects. Clean them up now to make sure we can open # the correct object in the bucket. localobj = './' + self.obj2file if os.path.exists(localobj): os.remove(localobj) localobj = './' + self.obj1second if os.path.exists(localobj): os.remove(localobj) # Reopen with the other prefix and check all data. Even though we're using the # other prefix, we should find all the data in the object with the original # prefix. conn_params = self.saved_conn + ',tiered_storage=(bucket_prefix=%s)' % self.prefix3 self.conn = self.wiredtiger_open('.', conn_params) self.session = self.conn.open_session() c = self.session.open_cursor(self.uri) self.check(c, 2) c.close() c = self.session.open_cursor(self.uri2) self.check(c, 1) c.close()
class test_tiered10(wttest.WiredTigerTestCase): storage_sources = [ ('dir_store', dict(auth_token=get_auth_token('dir_store'), bucket=get_bucket1_name('dir_store'), prefix1='1_', prefix2='2_', ss_name='dir_store')), ('s3', dict(auth_token=get_auth_token('s3_store'), bucket=get_bucket1_name('s3_store'), prefix1=generate_s3_prefix(), prefix2=generate_s3_prefix(), ss_name='s3_store')), ] # Make scenarios for different cloud service providers scenarios = make_scenarios(storage_sources) # If the 'uri' changes all the other names must change with it. base = 'test_tiered10-000000000' obj1file = base + '1.wtobj' uri = "table:test_tiered10" conn1_dir = "first_dir" conn2_dir = "second_dir" retention = 1 saved_conn = '' def conn_config(self): if self.ss_name == 'dir_store' and not os.path.exists(self.bucket): os.mkdir(self.bucket) os.mkdir(self.conn1_dir) os.mkdir(self.conn2_dir) # Use this to create the directories and set up for the others. dummy_conn = 'create,statistics=(all),' # For directory store, the bucket is a directory one level up from database directories. bucket = '' if self.ss_name == 'dir_store': bucket = '../' bucket += self.bucket self.saved_conn = \ 'debug_mode=(flush_checkpoint=true),' + \ 'create,statistics=(all),' + \ 'tiered_storage=(auth_token=%s,' % self.auth_token + \ 'bucket=%s,' % bucket + \ 'local_retention=%d,' % self.retention + \ 'name=%s),' % self.ss_name return dummy_conn # Load the storage store extension. def conn_extensions(self, extlist): config = '' # S3 store is built as an optional loadable extension, not all test environments build S3. if self.ss_name == 's3_store': # config = '=(config=\"(verbose=[api:1,version,tiered:1])\")' extlist.skip_if_missing = True # if self.ss_name == 'dir_store': #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")' # Windows doesn't support dynamically loaded extension libraries. if os.name == 'nt': extlist.skip_if_missing = True extlist.extension('storage_sources', self.ss_name + config) def check(self, tc, base, n): for i in range(base, n): self.assertEqual(tc[str(i)], str(i)) tc.set_key(str(n)) self.assertEquals(tc.search(), wiredtiger.WT_NOTFOUND) # Test calling the flush_tier API. def test_tiered(self): # Have two connections running in different directories, but sharing # the same bucket with different prefixes. Each database creates an # identically named table with different data. Each then does a flush # tier testing that both databases can coexist in the same bucket # without conflict. # # Then reopen the connections and make sure we can read data correctly. # # We open two connections manually so that they both have the same relative # pathnames. The standard connection is just a dummy for this test. ext = self.extensionsConfig() conn1_params = self.saved_conn + ext + ',tiered_storage=(bucket_prefix=%s)' % self.prefix1 conn1 = self.wiredtiger_open(self.conn1_dir, conn1_params) session1 = conn1.open_session() conn2_params = self.saved_conn + ext + ',tiered_storage=(bucket_prefix=%s)' % self.prefix2 conn2 = self.wiredtiger_open(self.conn2_dir, conn2_params) session2 = conn2.open_session() session1.create(self.uri, 'key_format=S,value_format=S,') session2.create(self.uri, 'key_format=S,value_format=S,') # Add first data. Checkpoint, flush and close the connection. c1 = session1.open_cursor(self.uri) c2 = session2.open_cursor(self.uri) c1["0"] = "0" c2["20"] = "20" self.check(c1, 0, 1) self.check(c2, 20, 1) c1.close() c2.close() session1.checkpoint() session1.flush_tier(None) session2.checkpoint() session2.flush_tier(None) conn1_obj1 = os.path.join(self.bucket, self.prefix1 + self.obj1file) conn2_obj1 = os.path.join(self.bucket, self.prefix2 + self.obj1file) if self.ss_name == 'dir_store': self.assertTrue(os.path.exists(conn1_obj1)) self.assertTrue(os.path.exists(conn2_obj1)) conn1.close() conn2.close() # Remove the local copies of the objects before we reopen so that we force # the system to read from the bucket or bucket cache. local = self.conn1_dir + '/' + self.obj1file if os.path.exists(local): os.remove(local) local = self.conn2_dir + '/' + self.obj1file if os.path.exists(local): os.remove(local) conn1 = self.wiredtiger_open(self.conn1_dir, conn1_params) session1 = conn1.open_session() conn2 = self.wiredtiger_open(self.conn2_dir, conn2_params) session2 = conn2.open_session() c1 = session1.open_cursor(self.uri) c2 = session2.open_cursor(self.uri) self.check(c1, 0, 1) self.check(c2, 20, 1) c1.close() c2.close()
class test_tiered14(wttest.WiredTigerTestCase): uri = "table:test_tiered14-{}" # format for subtests # FIXME-WT-7833: enable the commented scenarios and run the # test with the --long option. # The multiplier makes the size of keys and values progressively larger. # A multipler of 0 makes the keys and values a single length. multiplier = [ ('0', dict(multiplier=0)), ('S', dict(multiplier=1)), ('M', dict(multiplier=10)), #('L', dict(multiplier=100, long_only=True)), #('XL', dict(multiplier=1000, long_only=True)), ] keyfmt = [ ('integer', dict(keyfmt='i')), ('string', dict(keyfmt='S')), ] dataset = [ ('simple', dict(dataset='simple')), #('complex', dict(dataset='complex', long_only=True)), ] storage_sources = [ ('dir_store', dict( auth_token=get_auth_token('dir_store'), bucket=get_bucket1_name('dir_store'), bucket_prefix="pfx_", num_ops=100, ss_name='dir_store', )), ('s3', dict(auth_token=get_auth_token('s3_store'), bucket=get_bucket1_name('s3_store'), bucket_prefix=generate_s3_prefix(), num_ops=20, ss_name='s3_store')), ] scenarios = wtscenario.make_scenarios(multiplier, keyfmt, dataset, storage_sources) def conn_config(self): if self.ss_name == 'dir_store' and not os.path.exists(self.bucket): os.mkdir(self.bucket) return \ 'debug_mode=(flush_checkpoint=true),' + \ 'tiered_storage=(auth_token=%s,' % self.auth_token + \ 'bucket=%s,' % self.bucket + \ 'bucket_prefix=%s,' % self.bucket_prefix + \ 'name=%s),' % self.ss_name # Load the storage store extension. def conn_extensions(self, extlist): config = '' # S3 store is built as an optional loadable extension, not all test environments build S3. if self.ss_name == 's3_store': #config = '=(config=\"(verbose=1)\")' extlist.skip_if_missing = True #if self.ss_name == 'dir_store': #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")' # Windows doesn't support dynamically loaded extension libraries. if os.name == 'nt': extlist.skip_if_missing = True extlist.extension('storage_sources', self.ss_name + config) def progress(self, s): outstr = "testnum {}, position {}: {}".format(self.testnum, self.position, s) self.verbose(3, outstr) self.pr(outstr) # Run a sequence of operations, indicated by a string. # a = add some number of keys # u = update some number of keys # c = checkpoint # r = reopen # f = flush_tier # . = check to make sure all expected values are present # # We require a unique test number so we get can generate a different uri from # previous runs. A different approach is to drop the uri, but then we need to # remove the bucket and cache, which is specific to the storage source extension. def playback(self, testnum, ops): self.testnum = testnum self.position = -1 uri = self.uri.format(testnum) self.progress('Running ops: {} using uri {}'.format(ops, uri)) if self.dataset == 'simple': ds = TrackedSimpleDataSet(self, uri, self.multiplier, key_format=self.keyfmt) elif self.dataset == 'complex': ds = TrackedComplexDataSet(self, uri, self.multiplier, key_format=self.keyfmt) # Populate for a tracked data set is needed to create the uri. ds.populate() inserted = 0 # At the end of the sequence of operations, do a final check ('.'). for op in ops + '.': self.position += 1 try: if op == 'f': self.progress('flush_tier') self.session.flush_tier(None) elif op == 'c': self.progress('checkpoint') self.session.checkpoint() elif op == 'r': self.progress('reopen') self.reopen_conn() elif op == 'a': self.progress('add') n = random.randrange(1, 101) # 1 <= n <= 100 ds.store_range(inserted, n) inserted += n elif op == 'u': self.progress('update') # only update the elements if enough have already been added. n = random.randrange(1, 101) # 1 <= n <= 100 if n < inserted: pos = random.randrange(0, inserted - n) ds.store_range(pos, n) elif op == '.': self.progress('check') ds.check() except Exception as e: self.progress('Failed at position {} in {}: {}'.format( idx, ops, str(e))) raise (e) # Test tiered storage with checkpoints and flush_tier calls. def test_tiered(self): random.seed(0) # Get started with a fixed sequence of basic operations. # There's no particular reason to start with this sequence. testnum = 0 self.playback(testnum, "aaaaacaaa.uucrauaf.aauaac.auu.aacrauafa.uruua.") for i in range(0, 10): testnum += 1 # Generate a sequence of operations that is heavy on additions and updates. s = ''.join(random.choices('aaaaauuuuufcr.', k=self.num_ops)) self.playback(testnum, s) for i in range(0, 10): testnum += 1 # Generate a sequence of operations that is has a greater mix of 'operational' functions. s = ''.join(random.choices('aufcr.', k=self.num_ops)) self.playback(testnum, s)
class test_tiered07(wttest.WiredTigerTestCase): storage_sources = [ ('dir_store', dict(auth_token = get_auth_token('dir_store'), bucket = get_bucket1_name('dir_store'), bucket_prefix = "pfx_", ss_name = 'dir_store')), # FIXME-WT-8897 Disabled as S3 directory listing is interpreting a directory to end in a '/', # whereas the code in the tiered storage doesn't expect that. Enable when fixed. #('s3', dict(auth_token = get_auth_token('s3_store'), # bucket = get_bucket1_name('s3_store'), # bucket_prefix = generate_s3_prefix(), # ss_name = 's3_store')) ] # Make scenarios for different cloud service providers scenarios = make_scenarios(storage_sources) uri = "table:abc" uri2 = "table:ab" uri3 = "table:abcd" uri4 = "table:abcde" localuri = "table:local" newuri = "table:tier_new" def conn_extensions(self, extlist): config = '' # S3 store is built as an optional loadable extension, not all test environments build S3. if self.ss_name == 's3_store': #config = '=(config=\"(verbose=1)\")' extlist.skip_if_missing = True #if self.ss_name == 'dir_store': #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")' # Windows doesn't support dynamically loaded extension libraries. if os.name == 'nt': extlist.skip_if_missing = True extlist.extension('storage_sources', self.ss_name + config) def conn_config(self): if self.ss_name == 'dir_store' and not os.path.exists(self.bucket): os.mkdir(self.bucket) # 'verbose=(tiered),' + \ return \ 'debug_mode=(flush_checkpoint=true),' + \ 'tiered_storage=(auth_token=%s,' % self.auth_token + \ 'bucket=%s,' % self.bucket + \ 'bucket_prefix=%s,' % self.bucket_prefix + \ 'name=%s,' % self.ss_name + \ 'object_target_size=20M)' def check(self, tc, n): for i in range(0, n): self.assertEqual(tc[str(i)], str(i)) tc.set_key(str(n)) self.assertEquals(tc.search(), wiredtiger.WT_NOTFOUND) # Test calling schema APIs with a tiered table. def test_tiered(self): # Create a new tiered table. self.pr('create table') self.session.create(self.uri, 'key_format=S,value_format=S') self.pr('create table 2') self.session.create(self.uri2, 'key_format=S,value_format=S') self.pr('create table 3') self.session.create(self.uri3, 'key_format=S,value_format=S') self.pr('create table local') self.session.create(self.localuri, 'key_format=S,value_format=S,tiered_storage=(name=none)') # Rename is not supported for tiered tables. msg = "/is not supported/" self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:self.assertEquals(self.session.rename(self.uri, self.newuri, None), 0), msg) # Add some data and flush tier. self.pr('add one item to all tables') c = self.session.open_cursor(self.uri) c["0"] = "0" self.check(c, 1) c.close() c = self.session.open_cursor(self.uri2) c["0"] = "0" self.check(c, 1) c.close() c = self.session.open_cursor(self.uri3) c["0"] = "0" self.check(c, 1) c.close() c = self.session.open_cursor(self.localuri) c["0"] = "0" c.close() self.session.checkpoint() self.pr('After data, call flush_tier') self.session.flush_tier(None) # Drop table. self.pr('call drop') self.session.drop(self.localuri) self.session.drop(self.uri) # By default, the remove_files configuration for drop is true. This means that the # drop operation for tiered tables should both remove the files from the metadata # file and remove the corresponding local object files in the directory. self.assertFalse(os.path.isfile("abc-0000000001.wtobj")) self.assertFalse(os.path.isfile("abc-0000000002.wtobj")) # Dropping a table using the force setting should succeed even if the table does not exist. self.session.drop(self.localuri, 'force=true') self.session.drop(self.uri, 'force=true') # Dropping a table should not succeed if the table does not exist. # Test dropping a table that was previously dropped. self.assertRaises(wiredtiger.WiredTigerError, lambda: self.session.drop(self.localuri, None)) # Test dropping a table that does not exist. self.assertRaises(wiredtiger.WiredTigerError, lambda: self.session.drop("table:random_non_existent", None)) # Create new table with same name. This should error. msg = "/already exists/" self.pr('check cannot create with same name') self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:self.assertEquals(self.session.create(self.uri, 'key_format=S'), 0), msg) # Make sure there was no problem with overlapping table names. self.pr('check original similarly named tables') c = self.session.open_cursor(self.uri2) self.check(c, 1) c.close() c = self.session.open_cursor(self.uri3) self.check(c, 1) c.close() # Create new table with new name. self.pr('create new table') self.session.create(self.newuri, 'key_format=S') # Test the drop operation without removing associated files. self.session.create(self.uri4, 'key_format=S,value_format=S') self.session.drop(self.uri4, 'remove_files=false') self.assertTrue(os.path.isfile("abcde-0000000001.wtobj"))
class test_tiered02(wttest.WiredTigerTestCase): storage_sources = [ ('dirstore', dict(auth_token=get_auth_token('dir_store'), bucket=get_bucket1_name('dir_store'), bucket_prefix="pfx_", ss_name='dir_store')), ('s3', dict(auth_token=get_auth_token('s3_store'), bucket=get_bucket1_name('s3_store'), bucket_prefix=generate_s3_prefix(), ss_name='s3_store')), ] complex_dataset = [ ('simple_ds', dict(complex_dataset=False)), # Commented out compplex dataset that tests column groups and indexes because it crashes # in the middle of the test. FIXME: WT-9001 #('complex_ds', dict(complex_dataset=True)), ] # Make scenarios for different cloud service providers scenarios = make_scenarios(storage_sources, complex_dataset) uri = "table:test_tiered02" def conn_config(self): if self.ss_name == 'dir_store' and not os.path.exists(self.bucket): os.mkdir(self.bucket) return \ 'debug_mode=(flush_checkpoint=true),' + \ 'tiered_storage=(auth_token=%s,' % self.auth_token + \ 'bucket=%s,' % self.bucket + \ 'bucket_prefix=%s,' % self.bucket_prefix + \ 'name=%s),' % self.ss_name # Load the storage store extension. def conn_extensions(self, extlist): config = '' # S3 store is built as an optional loadable extension, not all test environments build S3. if self.ss_name == 's3_store': #config = '=(config=\"(verbose=1)\")' extlist.skip_if_missing = True #if self.ss_name == 'dir_store': #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")' # Windows doesn't support dynamically loaded extension libraries. if os.name == 'nt': extlist.skip_if_missing = True extlist.extension('storage_sources', self.ss_name + config) def progress(self, s): self.verbose(3, s) self.pr(s) def confirm_flush(self, increase=True): # Without directly using the filesystem API, directory listing is only supported on # the directory store. Limit this check to the directory store. if self.ss_name != 'dir_store': return got = sorted(list(os.listdir(self.bucket))) self.pr('Flushed objects: ' + str(got)) if increase: # WT-7639: we know that this assertion sometimes fails, # we are collecting more data - we still want it to fail # so it is noticed. if len(got) <= self.flushed_objects: from time import sleep self.prout( 'directory items: {} is not greater than {}!'.format( got, self.flushed_objects)) self.prout('waiting to see if it resolves') for i in range(0, 10): self.prout('checking again') newgot = sorted(list(os.listdir(self.bucket))) if len(newgot) > self.flushed_objects: self.prout('resolved, now see: {}'.format(newgot)) break sleep(i) self.assertGreater(len(got), self.flushed_objects) else: self.assertEqual(len(got), self.flushed_objects) self.flushed_objects = len(got) def get_dataset(self, rows): args = 'key_format=S' if self.complex_dataset: return ComplexDataSet(self, self.uri, rows, config=args) else: return SimpleDataSet(self, self.uri, rows, config=args) # Test tiered storage with checkpoints and flush_tier calls. def test_tiered(self): self.flushed_objects = 0 self.pr("create sys") self.progress('Create simple data set (10)') ds = self.get_dataset(10) self.progress('populate') ds.populate() ds.check() self.progress('checkpoint') self.session.checkpoint() self.progress('flush_tier') self.session.flush_tier(None) self.confirm_flush() ds.check() self.close_conn() self.progress('reopen_conn') self.reopen_conn() # Check what was there before. ds = self.get_dataset(10) ds.check() self.progress('Create simple data set (50)') ds = self.get_dataset(50) self.progress('populate') ds.populate() ds.check() self.progress('open extra cursor on ' + self.uri) cursor = self.session.open_cursor(self.uri, None, None) self.progress('checkpoint') self.session.checkpoint() self.progress('flush_tier') self.session.flush_tier(None) self.progress('flush_tier complete') self.confirm_flush() self.progress('Create simple data set (100)') ds = self.get_dataset(100) self.progress('populate') ds.populate() ds.check() self.progress('checkpoint') self.session.checkpoint() self.progress('flush_tier') self.session.flush_tier(None) self.confirm_flush() self.progress('Create simple data set (200)') ds = self.get_dataset(200) self.progress('populate') ds.populate() ds.check() cursor.close() self.progress('close_conn') self.close_conn() self.progress('reopen_conn') self.reopen_conn() # Check what was there before. ds = self.get_dataset(200) ds.check() # Now add some more. self.progress('Create simple data set (300)') ds = self.get_dataset(300) self.progress('populate') ds.populate() ds.check() # We haven't done a flush so there should be # nothing extra on the shared tier. self.confirm_flush(increase=False) self.progress('checkpoint') self.session.checkpoint() self.confirm_flush(increase=False) self.progress('END TEST')
class test_tiered13(test_import_base): storage_sources = [ ('dir_store', dict(auth_token=get_auth_token('dir_store'), bucket=get_bucket1_name('dir_store'), bucket_prefix="pfx_", ss_name='dir_store')), ('s3', dict(auth_token=get_auth_token('s3_store'), bucket=get_bucket1_name('s3_store'), bucket_prefix=generate_s3_prefix(), ss_name='s3_store')), ] # Make scenarios for different cloud service providers scenarios = make_scenarios(storage_sources) # If the 'uri' changes all the other names must change with it. base = 'test_tiered13-000000000' fileuri_base = 'file:' + base file1uri = fileuri_base + '1.wtobj' file2 = base + '2.wtobj' file2uri = fileuri_base + '2.wtobj' otherfile = 'other.wt' otheruri = 'file:' + otherfile uri = "table:test_tiered13" # Load the storage store extension. def conn_extensions(self, extlist): config = '' # S3 store is built as an optional loadable extension, not all test environments build S3. if self.ss_name == 's3_store': #config = '=(config=\"(verbose=1)\")' extlist.skip_if_missing = True #if self.ss_name == 'dir_store': #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")' # Windows doesn't support dynamically loaded extension libraries. if os.name == 'nt': extlist.skip_if_missing = True extlist.extension('storage_sources', self.ss_name + config) def conn_config(self): if self.ss_name == 'dir_store' and not os.path.exists(self.bucket): os.mkdir(self.bucket) self.saved_conn = \ 'debug_mode=(flush_checkpoint=true),' + \ 'create,tiered_storage=(auth_token=%s,' % self.auth_token + \ 'bucket=%s,' % self.bucket + \ 'bucket_prefix=%s,' % self.bucket_prefix + \ 'name=%s,' % self.ss_name + \ 'object_target_size=20M),' return self.saved_conn def test_tiered13(self): # Create a new tiered table. self.session.create(self.uri, 'key_format=S,value_format=S,') # Add first data. Checkpoint, flush and close the connection. c = self.session.open_cursor(self.uri) c["0"] = "0" c.close() self.session.checkpoint() self.session.flush_tier(None) c = self.session.open_cursor(self.uri) c["1"] = "1" c.close() self.session.checkpoint() # We now have the second object existing, with data in it. # Set up for the test. # - Create the tiered table (above). # - Find the metadata for the current file: object. # - Set up a new database for importing. # # Testing import and tiered tables. All should error: # - Try to import via the table:uri. # - Try to import via the table:uri with the file object's metadata. # - Try to import via the file:uri. # - Try to import via the file:uri with the file object's metadata. # - Try to import via a renamed file:name.wt. # - Try to import via a renamed file:name.wt with the file object's metadata. # Export the metadata for the current file object 2. cursor = self.session.open_cursor('metadata:', None, None) for k, v in cursor: if k.startswith(self.file2uri): fileobj_config = cursor[k] if k.startswith(self.uri): table_config = cursor[k] cursor.close() self.close_conn() # Contruct the config strings. import_enabled = 'import=(enabled,repair=true)' import_meta = 'import=(enabled,repair=false,file_metadata=(' + \ fileobj_config + '))' table_import_meta = table_config + ',import=(enabled,repair=false,file_metadata=(' + \ fileobj_config + '))' # Set up the import database. newdir = 'IMPORT_DB' shutil.rmtree(newdir, ignore_errors=True) os.mkdir(newdir) newbucket = os.path.join(newdir, self.bucket) if self.ss_name == 'dir_store': os.mkdir(newbucket) # It is tricky to work around the extension and connection bucket setup for # creating the new import directory that is tiered-enabled. ext = self.extensionsConfig() conn_params = self.saved_conn + ext self.conn = self.wiredtiger_open(newdir, conn_params) self.session = self.setUpSessionOpen(self.conn) # Copy the file to the file names we're going to test later. self.copy_file(self.file2, '.', newdir) copy_from = self.file2 copy_to = os.path.join(newdir, self.otherfile) shutil.copy(copy_from, copy_to) msg = '/Operation not supported/' enoent = '/No such file/' # Try to import via the table:uri. This fails with ENOENT because # it is looking for the normal on-disk file name. It cannot tell it # is a tiered table in this case. self.assertRaisesWithMessage( wiredtiger.WiredTigerError, lambda: self.session.create(self.uri, import_enabled), enoent) # Try to import via the table:uri with file metadata. self.assertRaisesWithMessage( wiredtiger.WiredTigerError, lambda: self.session.create(self.uri, table_import_meta), msg) # Try to import via the file:uri. self.assertRaisesWithMessage( wiredtiger.WiredTigerError, lambda: self.session.create(self.file2uri, import_enabled), msg) # Try to import via the file:uri with file metadata. self.assertRaisesWithMessage( wiredtiger.WiredTigerError, lambda: self.session.create(self.file2uri, import_meta), msg) # Try to import via a renamed object. If we don't send in metadata, # we cannot tell it was a tiered table until we read in the root page. # Only test this in diagnostic mode which has an assertion. # # FIXME-8644 There is an error path bug in wt_bm_read preventing this from # working correctly although the code to return an error is in the code. # Uncomment these lines when that bug is fixed. #if wiredtiger.diagnostic_build(): # self.assertRaisesWithMessage(wiredtiger.WiredTigerError, # lambda: self.session.create(self.otheruri, import_enabled), msg) # Try to import via a renamed object with metadata. self.assertRaisesWithMessage( wiredtiger.WiredTigerError, lambda: self.session.create(self.otheruri, import_meta), msg)
class test_tiered04(wttest.WiredTigerTestCase): storage_sources = [ ('dir_store', dict(auth_token=get_auth_token('dir_store'), bucket=get_bucket1_name('dir_store'), bucket1=get_bucket2_name('dir_store'), prefix="pfx_", prefix1="pfx1_", ss_name='dir_store')), ( 's3', dict( auth_token=get_auth_token('s3_store'), bucket=get_bucket1_name('s3_store'), bucket1=get_bucket2_name('s3_store'), prefix=generate_s3_prefix(), # Test that object name with "/" are processed. prefix1=generate_s3_prefix() + "/s3/source/", ss_name='s3_store')), ] # Make scenarios for different cloud service providers scenarios = make_scenarios(storage_sources) # If the 'uri' changes all the other names must change with it. base = 'test_tiered04-000000000' fileuri_base = 'file:' + base obj1file = base + '1.wtobj' obj2file = base + '2.wtobj' objuri = 'object:' + base + '1.wtobj' tiereduri = "tiered:test_tiered04" tieruri = "tier:test_tiered04" uri = "table:test_tiered04" uri1 = "table:test_other_tiered04" uri_none = "table:test_local04" file_none = "file:test_local04.wt" object_sys = "9M" object_sys_val = 9 * 1024 * 1024 object_uri = "15M" object_uri_val = 15 * 1024 * 1024 retention = 3 retention1 = 600 def conn_config(self): if self.ss_name == 'dir_store': os.mkdir(self.bucket) os.mkdir(self.bucket1) self.saved_conn = \ 'debug_mode=(flush_checkpoint=true),' + \ 'statistics=(all),' + \ 'tiered_storage=(auth_token=%s,' % self.auth_token + \ 'bucket=%s,' % self.bucket + \ 'bucket_prefix=%s,' % self.prefix + \ 'local_retention=%d,' % self.retention + \ 'name=%s,' % self.ss_name + \ 'object_target_size=%s)' % self.object_sys return self.saved_conn # Load the storage store extension. def conn_extensions(self, extlist): config = '' # S3 store is built as an optional loadable extension, not all test environments build S3. if self.ss_name == 's3_store': #config = '=(config=\"(verbose=1)\")' extlist.skip_if_missing = True #if self.ss_name == 'dir_store': #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")' # Windows doesn't support dynamically loaded extension libraries. if os.name == 'nt': extlist.skip_if_missing = True extlist.extension('storage_sources', self.ss_name + config) # Check for a specific string as part of the uri's metadata. def check_metadata(self, uri, val_str): c = self.session.open_cursor('metadata:') val = c[uri] c.close() self.assertTrue(val_str in val) def get_stat(self, stat, uri): if uri == None: stat_cursor = self.session.open_cursor('statistics:') else: stat_cursor = self.session.open_cursor('statistics:' + uri) val = stat_cursor[stat][2] stat_cursor.close() return val def check(self, tc, n): for i in range(0, n): self.assertEqual(tc[str(i)], str(i)) tc.set_key(str(n)) self.assertEquals(tc.search(), wiredtiger.WT_NOTFOUND) # Test calling the flush_tier API. def test_tiered(self): # Create three tables. One using the system tiered storage, one # specifying its own bucket and object size and one using no # tiered storage. Use stats to verify correct setup. intl_page = 'internal_page_max=16K' base_create = 'key_format=S,value_format=S,' + intl_page self.pr("create sys") self.session.create(self.uri, base_create) conf = \ ',tiered_storage=(auth_token=%s,' % self.auth_token + \ 'bucket=%s,' % self.bucket1 + \ 'bucket_prefix=%s,' % self.prefix1 + \ 'local_retention=%d,' % self.retention1 + \ 'name=%s,' % self.ss_name + \ 'object_target_size=%s)' % self.object_uri self.pr("create non-sys tiered") self.session.create(self.uri1, base_create + conf) conf = ',tiered_storage=(name=none)' self.pr("create non tiered/local") self.session.create(self.uri_none, base_create + conf) c = self.session.open_cursor(self.uri) c1 = self.session.open_cursor(self.uri1) cn = self.session.open_cursor(self.uri_none) c["0"] = "0" c1["0"] = "0" cn["0"] = "0" self.check(c, 1) self.check(c1, 1) self.check(cn, 1) c.close() flush = 0 # Check the local retention. After a flush_tier call the object file should exist in # the local database. Then after sleeping long enough it should be removed. self.pr("flush tier no checkpoint") self.session.flush_tier(None) flush += 1 # We should not have flushed either tiered table. skip = self.get_stat(stat.conn.flush_tier_skipped, None) self.assertEqual(skip, 2) self.session.checkpoint() self.session.flush_tier(None) # Now we should have switched both tables. The skip value should stay the same. skip = self.get_stat(stat.conn.flush_tier_skipped, None) self.assertEqual(skip, 2) switch = self.get_stat(stat.conn.flush_tier_switched, None) self.assertEqual(switch, 2) flush += 1 self.pr("Check for ") self.pr(self.obj1file) self.assertTrue(os.path.exists(self.obj1file)) self.assertTrue(os.path.exists(self.obj2file)) remove1 = self.get_stat(stat.conn.local_objects_removed, None) time.sleep(self.retention + 1) # We call flush_tier here because otherwise the internal thread that # processes the work units won't run for a while. This call will signal # the internal thread to process the work units. self.session.flush_tier('force=true') flush += 1 # We still sleep to give the internal thread a chance to run. Some slower # systems can fail here if we don't give them time. time.sleep(1) self.pr("Check removal of ") self.pr(self.obj1file) self.assertFalse(os.path.exists(self.obj1file)) remove2 = self.get_stat(stat.conn.local_objects_removed, None) self.assertTrue(remove2 > remove1) c = self.session.open_cursor(self.uri) c["1"] = "1" c1["1"] = "1" cn["1"] = "1" self.check(c, 2) c.close() c = self.session.open_cursor(self.uri) c["2"] = "2" c1["2"] = "2" cn["2"] = "2" self.check(c, 3) c1.close() cn.close() self.session.checkpoint() self.pr("flush tier again, holding open cursor") self.session.flush_tier(None) flush += 1 c["3"] = "3" self.check(c, 4) c.close() calls = self.get_stat(stat.conn.flush_tier, None) self.assertEqual(calls, flush) obj = self.get_stat(stat.conn.tiered_object_size, None) self.assertEqual(obj, self.object_sys_val) # As we flush each object, the next object exists, but our first flush was a no-op. # So the value for the last file: object should be 'flush'. last = 'last=' + str(flush) # For now all earlier objects exist. So it is always 1 until garbage collection # starts removing them. oldest = 'oldest=1' fileuri = self.fileuri_base + str(flush) + '.wtobj' self.check_metadata(self.tiereduri, intl_page) self.check_metadata(self.tiereduri, last) self.check_metadata(self.tiereduri, oldest) self.check_metadata(fileuri, intl_page) self.check_metadata(self.objuri, intl_page) # Check for the correct tiered_object setting for both tiered and not tiered tables. tiered_false = 'tiered_object=false' tiered_true = 'tiered_object=true' self.check_metadata(fileuri, tiered_true) self.check_metadata(self.objuri, tiered_true) self.check_metadata(self.tieruri, tiered_true) self.check_metadata(self.file_none, tiered_false) # Now test some connection statistics with operations. retain = self.get_stat(stat.conn.tiered_retention, None) self.assertEqual(retain, self.retention) self.session.flush_tier(None) skip1 = self.get_stat(stat.conn.flush_tier_skipped, None) switch1 = self.get_stat(stat.conn.flush_tier_switched, None) # Make sure the last checkpoint and this flush tier are timed differently # so that we can specifically check the statistics and code paths in the test. # Sleep some to control the execution. time.sleep(2) self.session.flush_tier('force=true') skip2 = self.get_stat(stat.conn.flush_tier_skipped, None) switch2 = self.get_stat(stat.conn.flush_tier_switched, None) self.assertGreater(switch2, switch1) self.assertEqual(skip1, skip2) flush += 2 calls = self.get_stat(stat.conn.flush_tier, None) self.assertEqual(calls, flush) # Test reconfiguration. config = 'tiered_storage=(local_retention=%d)' % self.retention1 self.pr("reconfigure") self.conn.reconfigure(config) retain = self.get_stat(stat.conn.tiered_retention, None) self.assertEqual(retain, self.retention1) # Call flush_tier with its various configuration arguments. It is difficult # to force a timeout or lock contention with a unit test. So just test the # call for now. # # There have been no data changes nor checkpoints since the last flush_tier with # force, above. The skip statistics should increase and the switched # statistics should stay the same. skip1 = self.get_stat(stat.conn.flush_tier_skipped, None) switch1 = self.get_stat(stat.conn.flush_tier_switched, None) self.session.flush_tier('timeout=100') skip2 = self.get_stat(stat.conn.flush_tier_skipped, None) switch2 = self.get_stat(stat.conn.flush_tier_switched, None) self.assertEqual(switch1, switch2) self.assertGreater(skip2, skip1) self.session.flush_tier('lock_wait=false') self.session.flush_tier('sync=off') flush += 3 self.pr("reconfigure get stat") calls = self.get_stat(stat.conn.flush_tier, None) self.assertEqual(calls, flush) # Test that the checkpoint and flush times work across a connection restart. # Make modifications and then close the connection (which will checkpoint). # Reopen the connection and call flush_tier. Verify this flushes the object. c = self.session.open_cursor(self.uri) c["4"] = "4" self.check(c, 5) c.close() # Manually reopen the connection because the default function above tries to # make the bucket directories. self.reopen_conn(config=self.saved_conn) remove1 = self.get_stat(stat.conn.local_objects_removed, None) skip1 = self.get_stat(stat.conn.flush_tier_skipped, None) switch1 = self.get_stat(stat.conn.flush_tier_switched, None) self.session.flush_tier(None) skip2 = self.get_stat(stat.conn.flush_tier_skipped, None) switch2 = self.get_stat(stat.conn.flush_tier_switched, None) # The first flush_tier after restart should have queued removal work units # for other objects. Sleep and then force a flush tier to signal the internal # thread and make sure that some objects were removed. time.sleep(self.retention + 1) self.session.flush_tier('force=true') # Sleep to give the internal thread time to run and process. time.sleep(1) self.assertFalse(os.path.exists(self.obj1file)) remove2 = self.get_stat(stat.conn.local_objects_removed, None) self.assertTrue(remove2 > remove1) # # Due to the above modification, we should skip the 'other' table while # switching the main tiered table. Therefore, both the skip and switch # values should increase by one. self.assertEqual(skip2, skip1 + 1) self.assertEqual(switch2, switch1 + 1)
class test_tiered11(wttest.WiredTigerTestCase): storage_sources = [('dir_store', dict(auth_token=get_auth_token('dir_store'), bucket=get_bucket1_name('dir_store'), bucket_prefix="pfx_", ss_name='dir_store')), ('s3', dict(auth_token=get_auth_token('s3_store'), bucket=get_bucket1_name('s3_store'), bucket_prefix=generate_s3_prefix(), ss_name='s3_store'))] # Make scenarios for different cloud service providers scenarios = make_scenarios(storage_sources) # If the 'uri' changes all the other names must change with it. base = 'test_tiered11-000000000' nentries = 10 objuri = 'object:' + base + '1.wtobj' tiereduri = "tiered:test_tiered11" uri = "table:test_tiered11" def conn_config(self): if self.ss_name == 'dir_store' and not os.path.exists(self.bucket): os.mkdir(self.bucket) self.saved_conn = \ 'debug_mode=(flush_checkpoint=true),' + \ 'statistics=(all),' + \ 'tiered_storage=(auth_token=%s,' % self.auth_token + \ 'bucket=%s,' % self.bucket + \ 'bucket_prefix=%s,' % self.bucket_prefix + \ 'name=%s)' % self.ss_name return self.saved_conn # Load the storage store extension. def conn_extensions(self, extlist): config = '' # S3 store is built as an optional loadable extension, not all test environments build S3. if self.ss_name == 's3_store': #config = '=(config=\"(verbose=1)\")' extlist.skip_if_missing = True #if self.ss_name == 'dir_store': #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")' # Windows doesn't support dynamically loaded extension libraries. if os.name == 'nt': extlist.skip_if_missing = True extlist.extension('storage_sources', self.ss_name + config) # Check for a specific string as part of the uri's metadata. def check_metadata(self, uri, val_str, match=True): #self.pr("Check_meta: uri: " + uri) c = self.session.open_cursor('metadata:') val = c[uri] c.close() #self.pr("Check_meta: metadata val: " + val) if match: #self.pr("Check_meta: Should see val_str: " + val_str) self.assertTrue(val_str in val) else: #self.pr("Check_meta: Should not see val_str: " + val_str) self.assertFalse(val_str in val) def add_data(self, start): c = self.session.open_cursor(self.uri) # Begin by adding some data. end = start + self.nentries for i in range(start, end): self.session.begin_transaction() c[i] = i # Jump the commit TS to leave rooom for the stable TS separate from any commit TS. self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(i * 2)) # Set the oldest and stable timestamp to the end. end_ts = self.timestamp_str(end - 1) self.conn.set_timestamp('oldest_timestamp=' + end_ts + ',stable_timestamp=' + end_ts) c.close() return end_ts # Test calling the flush_tier API. def test_tiered11(self): # Create a tiered table and checkpoint. Make sure the recorded # timestamp is what we expect. intl_page = 'internal_page_max=16K' base_create = 'key_format=i,value_format=i,' + intl_page self.session.create(self.uri, base_create) end_ts = self.add_data(1) self.session.checkpoint() new_end_ts = self.add_data(self.nentries) # We have a new stable timestamp, but after the checkpoint. Make # sure the flush tier records the correct timestamp. self.session.flush_tier(None) # Make sure a new checkpoint doesn't change any of our timestamp info. self.session.checkpoint() flush_str = 'flush_timestamp="' + end_ts + '"' self.check_metadata(self.tiereduri, flush_str) self.check_metadata(self.objuri, flush_str) # Make sure some flush time was saved. We don't know what it is other # than it should not be zero. time_str = "flush_time=0" self.check_metadata(self.tiereduri, time_str, False) self.check_metadata(self.objuri, time_str, False)
class test_tiered06(wttest.WiredTigerTestCase): storage_sources = [ ('dir_store', dict(auth_token = get_auth_token('dir_store'), bucket1 = get_bucket1_name('dir_store'), bucket2 = get_bucket2_name('dir_store'), bucket_prefix_base = "pfx_", ss_name = 'dir_store')), ('s3', dict(auth_token = get_auth_token('s3_store'), bucket1 = get_bucket1_name('s3_store'), bucket2 = get_bucket2_name('s3_store'), bucket_prefix_base = generate_s3_prefix(), ss_name = 's3_store')), ] # Make scenarios for different cloud service providers scenarios = make_scenarios(storage_sources) # Load the storage store extension. def conn_extensions(self, extlist): config = '' # S3 store is built as an optional loadable extension, not all test environments build S3. if self.ss_name == 's3_store': #config = '=(config=\"(verbose=1)\")' extlist.skip_if_missing = True #if self.ss_name == 'dir_store': #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")' # Windows doesn't support dynamically loaded extension libraries. if os.name == 'nt': extlist.skip_if_missing = True extlist.extension('storage_sources', self.ss_name + config) def breakpoint(self): import pdb, sys sys.stdin = open('/dev/tty', 'r') sys.stdout = open('/dev/tty', 'w') sys.stderr = open('/dev/tty', 'w') pdb.set_trace() def get_storage_source(self): return self.conn.get_storage_source(self.ss_name) def get_fs_config(self, prefix = '', cache_dir = ''): conf = '' if prefix: conf += ',prefix=' + prefix if cache_dir: conf += ',cache_directory=' + cache_dir return conf def test_ss_basic(self): # Test some basic functionality of the storage source API, calling # each supported method in the API at least once. session = self.session ss = self.get_storage_source() # Since this class has multiple tests, append test name to the prefix to # avoid namespace collison. 0th element on the stack is the current function. prefix = self.bucket_prefix_base + inspect.stack()[0][3] + '/' # The directory store needs the bucket created as a directory on the filesystem. if self.ss_name == 'dir_store': os.mkdir(self.bucket1) fs = ss.ss_customize_file_system(session, self.bucket1, self.auth_token, self.get_fs_config(prefix)) # The object doesn't exist yet. if self.ss_name == 's3_store': with self.expectedStderrPattern('.*HTTP response code: 404.*'): self.assertFalse(fs.fs_exist(session, 'foobar')) else: self.assertFalse(fs.fs_exist(session, 'foobar')) # We cannot use the file system to create files, it is readonly. # So use python I/O to build up the file. f = open('foobar', 'wb') # The object still doesn't exist yet. if self.ss_name == 's3_store': with self.expectedStderrPattern('.*HTTP response code: 404.*'): self.assertFalse(fs.fs_exist(session, 'foobar')) else: self.assertFalse(fs.fs_exist(session, 'foobar')) outbytes = ('MORE THAN ENOUGH DATA\n'*100000).encode() f.write(outbytes) f.close() # Nothing is in the directory list until a flush. self.assertEquals(fs.fs_directory_list(session, '', ''), []) # Flushing copies the file into the file system. ss.ss_flush(session, fs, 'foobar', 'foobar', None) ss.ss_flush_finish(session, fs, 'foobar', 'foobar', None) # The object exists now. self.assertEquals(fs.fs_directory_list(session, '', ''), ['foobar']) self.assertTrue(fs.fs_exist(session, 'foobar')) fh = fs.fs_open_file(session, 'foobar', FileSystem.open_file_type_data, FileSystem.open_readonly) inbytes = bytes(1000000) # An empty buffer with a million zero bytes. fh.fh_read(session, 0, inbytes) # Read into the buffer. self.assertEquals(outbytes[0:1000000], inbytes) self.assertEquals(fs.fs_size(session, 'foobar'), len(outbytes)) self.assertEquals(fh.fh_size(session), len(outbytes)) fh.close(session) # The fh_lock call doesn't do anything in the directory and S3 store implementation. fh = fs.fs_open_file(session, 'foobar', FileSystem.open_file_type_data, FileSystem.open_readonly) fh.fh_lock(session, True) fh.fh_lock(session, False) fh.close(session) # Files that have been flushed cannot be manipulated. with self.expectedStderrPattern('foobar: rename of file not supported'): self.assertRaisesException(wiredtiger.WiredTigerError, lambda: fs.fs_rename(session, 'foobar', 'barfoo', 0)) self.assertEquals(fs.fs_directory_list(session, '', ''), ['foobar']) # Files that have been flushed cannot be manipulated through the custom file system. with self.expectedStderrPattern('foobar: remove of file not supported'): self.assertRaisesException(wiredtiger.WiredTigerError, lambda: fs.fs_remove(session, 'foobar', 0)) self.assertEquals(fs.fs_directory_list(session, '', ''), ['foobar']) fs.terminate(session) ss.terminate(session) def test_ss_write_read(self): # Write and read to a file non-sequentially. session = self.session ss = self.get_storage_source() # Since this class has multiple tests, append test name to the prefix to # avoid namespace collison. 0th element on the stack is the current function. prefix = self.bucket_prefix_base + inspect.stack()[0][3] + '/' cachedir = self.bucket1 + '_cache' os.mkdir(cachedir) # Directory store needs the bucket created as a directory on the filesystem. if self.ss_name == 'dir_store': os.mkdir(self.bucket1) fs = ss.ss_customize_file_system(session, self.bucket1, self.auth_token, self.get_fs_config(prefix, cachedir)) # We call these 4K chunks of data "blocks" for this test, but that doesn't # necessarily relate to WT block sizing. nblocks = 1000 block_size = 4096 f = open('abc', 'wb') # Create some blocks filled with 'a', etc. a_block = ('a' * block_size).encode() b_block = ('b' * block_size).encode() c_block = ('c' * block_size).encode() file_size = nblocks * block_size # Write all blocks as 'a', but in reverse order. for pos in range(file_size - block_size, 0, -block_size): f.seek(pos) f.write(a_block) # Write the even blocks as 'b', forwards. for pos in range(0, file_size, block_size * 2): f.seek(pos) f.write(b_block) # Write every third block as 'c', backwards. for pos in range(file_size - block_size, 0, -block_size * 3): f.seek(pos) f.write(c_block) f.close() # Flushing copies the file into the file system. ss.ss_flush(session, fs, 'abc', 'abc', None) ss.ss_flush_finish(session, fs, 'abc', 'abc', None) # Use the file system to open and read the file. # We do this twice, and between iterations, we remove the cached file to make sure # it is copied back from the bucket. # # XXX: this uses knowledge of the implementation, but at the current time, # we don't have a way via the API to "age out" a file from the cache. for i in range(0, 2): in_block = bytes(block_size) fh = fs.fs_open_file(session, 'abc', FileSystem.open_file_type_data, FileSystem.open_readonly) # Do some spot checks, reading non-sequentially. fh.fh_read(session, 500 * block_size, in_block) # divisible by 2, not 3 self.assertEquals(in_block, b_block) fh.fh_read(session, 333 * block_size, in_block) # divisible by 3, not 2 self.assertEquals(in_block, c_block) fh.fh_read(session, 401 * block_size, in_block) # not divisible by 2 or 3 self.assertEquals(in_block, a_block) # Read the whole file, backwards checking to make sure # each block was written correctly. for block_num in range(nblocks - 1, 0, -1): pos = block_num * block_size fh.fh_read(session, pos, in_block) if block_num % 3 == 0: self.assertEquals(in_block, c_block) elif block_num % 2 == 0: self.assertEquals(in_block, b_block) else: self.assertEquals(in_block, a_block) fh.close(session) os.remove(os.path.join(cachedir, 'abc')) ss.terminate(session) def create_with_fs(self, fs, fname): session = self.session f = open(fname, 'wb') f.write('some stuff'.encode()) f.close() cachedir1 = "./cache1" cachedir2 = "./cache2" # Add a suffix to each in a list. def suffix(self, lst, sfx): return [x + '.' + sfx for x in lst] def check_dirlist(self, fs, prefix, expect): # We don't require any sorted output for directory lists, # so we'll sort before comparing.' got = sorted(fs.fs_directory_list(self.session, '', prefix)) expect = sorted(self.suffix(expect, 'wtobj')) self.assertEquals(got, expect) # Check for data files in the WiredTiger home directory. def check_home(self, expect): # Get list of all .wt files in home, prune out the WiredTiger produced ones. got = sorted(list(os.listdir(self.home))) got = [x for x in got if not x.startswith('WiredTiger') and x.endswith('.wt')] expect = sorted(self.suffix(expect, 'wt')) self.assertEquals(got, expect) # Check that objects are "in the cloud" for the directory store after a flush. # Using the directory storage module, they are actually going to be in either # bucket1 or bucket2. def check_local_objects(self, expect1, expect2): if self.ss_name != 'dir_store': return got = sorted(list(os.listdir(self.bucket1))) expect = sorted(self.suffix(expect1, 'wtobj')) self.assertEquals(got, expect) got = sorted(list(os.listdir(self.bucket2))) expect = sorted(self.suffix(expect2, 'wtobj')) self.assertEquals(got, expect) # Check that objects are in the cache directory after flush_finish. def check_caches(self, expect1, expect2): got = sorted(list(os.listdir(self.cachedir1))) expect = sorted(self.suffix(expect1, 'wtobj')) self.assertEquals(got, expect) got = sorted(list(os.listdir(self.cachedir2))) expect = sorted(self.suffix(expect2, 'wtobj')) self.assertEquals(got, expect) def create_wt_file(self, name): with open(name + '.wt', 'w') as f: f.write('hello') def test_ss_file_systems(self): # Test using various buckets, hosts. session = self.session ss = self.get_storage_source() # Since this class has multiple tests, append test name to the prefix to # avoid namespace collison. 0th element on the stack is the current function. prefix = self.bucket_prefix_base + inspect.stack()[0][3] + '/' # Directory store needs the bucket created as a directory on the filesystem. if self.ss_name == 'dir_store': os.mkdir(self.bucket1) os.mkdir(self.bucket2) os.mkdir(self.cachedir1) os.mkdir(self.cachedir2) bad_bucket = "./objects_BAD" bad_cachedir = '/BAD' # Create file system objects. First try some error cases. errmsg = '/No such /' self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: ss.ss_customize_file_system(session, self.bucket1, self.auth_token, self.get_fs_config(prefix, bad_cachedir)), errmsg) # S3 store expects a region with the bucket if self.ss_name == 's3_store': bad_bucket += ';us-east-2' self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: ss.ss_customize_file_system(session, bad_bucket, self.auth_token, self.get_fs_config(prefix, self.cachedir1)), errmsg) # For directory store - Create an empty file, try to use it as a directory. if self.ss_name == 'dir_store': with open("some_file", "w"): pass errmsg = '/Invalid argument/' self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: ss.ss_customize_file_system( session, "some_file", self.auth_token, ',cache_directory=' + self.bucket1), errmsg) # Now create some file systems that should succeed. # Use either different bucket directories or different prefixes, # so activity that happens in the various file systems should be independent. fs1 = ss.ss_customize_file_system(session, self.bucket1, self.auth_token, self.get_fs_config(prefix, self.cachedir1)) fs2 = ss.ss_customize_file_system(session, self.bucket2, self.auth_token, self.get_fs_config(prefix, self.cachedir2)) # Create files in the wt home directory. for a in ['beagle', 'bird', 'bison', 'bat']: self.create_wt_file(a) for a in ['cat', 'cougar', 'coyote', 'cub']: self.create_wt_file(a) # Everything is in wt home, nothing in the file system yet. self.check_home(['beagle', 'bird', 'bison', 'bat', 'cat', 'cougar', 'coyote', 'cub']) self.check_dirlist(fs1, '', []) self.check_dirlist(fs2, '', []) self.check_caches([], []) self.check_local_objects([], []) # A flush copies to the cloud, nothing is removed. ss.ss_flush(session, fs1, 'beagle.wt', 'beagle.wtobj') self.check_home(['beagle', 'bird', 'bison', 'bat', 'cat', 'cougar', 'coyote', 'cub']) self.check_dirlist(fs1, '', ['beagle']) self.check_dirlist(fs2, '', []) self.check_caches([], []) self.check_local_objects(['beagle'], []) # Bad file to flush. errmsg = '/No such file/' self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: ss.ss_flush(session, fs1, 'bad.wt', 'bad.wtobj'), errmsg) # It's okay to flush again, nothing changes. ss.ss_flush(session, fs1, 'beagle.wt', 'beagle.wtobj') self.check_home(['beagle', 'bird', 'bison', 'bat', 'cat', 'cougar', 'coyote', 'cub']) self.check_dirlist(fs1, '', ['beagle']) self.check_dirlist(fs2, '', []) self.check_caches([], []) self.check_local_objects(['beagle'], []) # When we flush_finish, the local file will be in both the local and cache directory. ss.ss_flush_finish(session, fs1, 'beagle.wt', 'beagle.wtobj') self.check_home(['beagle', 'bird', 'bison', 'bat', 'cat', 'cougar', 'coyote', 'cub']) self.check_dirlist(fs1, '', ['beagle']) self.check_dirlist(fs2, '', []) self.check_caches(['beagle'], []) self.check_local_objects(['beagle'], []) # Do a some more in each file system. ss.ss_flush(session, fs1, 'bison.wt', 'bison.wtobj') ss.ss_flush(session, fs2, 'cat.wt', 'cat.wtobj') ss.ss_flush(session, fs1, 'bat.wt', 'bat.wtobj') ss.ss_flush_finish(session, fs2, 'cat.wt', 'cat.wtobj') ss.ss_flush(session, fs2, 'cub.wt', 'cub.wtobj') ss.ss_flush_finish(session, fs1, 'bat.wt', 'bat.wtobj') self.check_home(['beagle', 'bird', 'bison', 'bat', 'cat', 'cougar', 'coyote', 'cub']) self.check_dirlist(fs1, '', ['beagle', 'bat', 'bison']) self.check_dirlist(fs2, '', ['cat', 'cub']) self.check_caches(['beagle', 'bat'], ['cat']) self.check_local_objects(['beagle', 'bat', 'bison'], ['cat', 'cub']) # Test directory listing prefixes. self.check_dirlist(fs1, '', ['beagle', 'bat', 'bison']) self.check_dirlist(fs1, 'ba', ['bat']) self.check_dirlist(fs1, 'be', ['beagle']) self.check_dirlist(fs1, 'x', []) # Terminate just one of the custom file systems. # We should be able to terminate file systems, but we should # also be able to terminate the storage source without terminating # all the file systems we created. fs1.terminate(session) ss.terminate(session)
class test_tiered08(wttest.WiredTigerTestCase): storage_sources = [('dir_store', dict(auth_token=get_auth_token('dir_store'), bucket=get_bucket1_name('dir_store'), bucket_prefix="pfx_", ss_name='dir_store')), ('s3', dict(auth_token=get_auth_token('s3_store'), bucket=get_bucket1_name('s3_store'), bucket_prefix=generate_s3_prefix(), ss_name='s3_store'))] # Make scenarios for different cloud service providers scenarios = make_scenarios(storage_sources) batch_size = 100000 # Keep inserting keys until we've done this many flush and checkpoint ops. ckpt_flush_target = 10 uri = "table:test_tiered08" def conn_config(self): if self.ss_name == 'dir_store' and not os.path.exists(self.bucket): os.mkdir(self.bucket) return \ 'debug_mode=(flush_checkpoint=true),' + \ 'statistics=(fast),' + \ 'tiered_storage=(auth_token=%s,' % self.auth_token + \ 'bucket=%s,' % self.bucket + \ 'bucket_prefix=%s,' % self.bucket_prefix + \ 'name=%s),tiered_manager=(wait=0)' % self.ss_name # Load the storage store extension. def conn_extensions(self, extlist): config = '' # S3 store is built as an optional loadable extension, not all test environments build S3. if self.ss_name == 's3_store': #config = '=(config=\"(verbose=1)\")' extlist.skip_if_missing = True #if self.ss_name == 'dir_store': #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")' # Windows doesn't support dynamically loaded extension libraries. if os.name == 'nt': extlist.skip_if_missing = True extlist.extension('storage_sources', self.ss_name + config) def get_stat(self, stat): stat_cursor = self.session.open_cursor('statistics:') val = stat_cursor[stat][2] stat_cursor.close() return val def key_gen(self, i): return 'KEY' + str(i) def value_gen(self, i): return 'VALUE_' + 'filler' * (i % 12) + str(i) # Populate the test table. Keep adding keys until the desired number of flush and # checkpoint operations have happened. def populate(self): ckpt_count = 0 flush_count = 0 nkeys = 0 self.pr('Populating tiered table') c = self.session.open_cursor(self.uri, None, None) while ckpt_count < self.ckpt_flush_target or flush_count < self.ckpt_flush_target: for i in range(nkeys, nkeys + self.batch_size): c[self.key_gen(i)] = self.value_gen(i) nkeys += self.batch_size ckpt_count = self.get_stat(stat.conn.txn_checkpoint) flush_count = self.get_stat(stat.conn.flush_tier) c.close() return nkeys def verify(self, key_count): self.pr('Verifying tiered table') c = self.session.open_cursor(self.uri, None, None) for i in range(key_count): self.assertEqual(c[self.key_gen(i)], self.value_gen(i)) c.close() def test_tiered08(self): # FIXME-WT-7833 # This test can trigger races in file handle access during flush_tier. # We will re-enable it when that is fixed. self.skipTest( 'Concurrent flush_tier and insert operations not supported yet.') cfg = self.conn_config() self.pr('Config is: ' + cfg) intl_page = 'internal_page_max=16K' base_create = 'key_format=S,value_format=S,' + intl_page self.session.create(self.uri, base_create) done = threading.Event() ckpt = checkpoint_thread(self.conn, done) flush = flush_tier_thread(self.conn, done) # Start background threads and give them a chance to start. ckpt.start() flush.start() time.sleep(0.5) key_count = self.populate() done.set() flush.join() ckpt.join() self.verify(key_count) self.close_conn() self.pr('Reopening tiered table') self.reopen_conn() self.verify(key_count)
class test_tiered12(wttest.WiredTigerTestCase): storage_sources = [ ('dir_store', dict(auth_token=get_auth_token('dir_store'), bucket=get_bucket1_name('dir_store'), bucket_prefix="pfx_", ss_name='dir_store')), ('s3', dict(auth_token=get_auth_token('s3_store'), bucket=get_bucket1_name('s3_store'), bucket_prefix=generate_s3_prefix(), ss_name='s3_store')), ] # Make scenarios for different cloud service providers scenarios = make_scenarios(storage_sources) # If the 'uri' changes all the other names must change with it. base = 'test_tiered12-000000000' obj1file = base + '1.wtobj' uri = "table:test_tiered12" retention = 1 saved_conn = '' def conn_config(self): if self.ss_name == 'dir_store' and not os.path.exists(self.bucket): os.mkdir(self.bucket) self.saved_conn = \ 'debug_mode=(flush_checkpoint=true),' + \ 'statistics=(all),timing_stress_for_test=(tiered_flush_finish),' + \ 'tiered_storage=(auth_token=%s,' % self.auth_token + \ 'bucket=%s,' % self.bucket + \ 'bucket_prefix=%s,' % self.bucket_prefix + \ 'local_retention=%d,' % self.retention + \ 'name=%s)' % self.ss_name return self.saved_conn # Load the storage store extension. def conn_extensions(self, extlist): config = '' # S3 store is built as an optional loadable extension, not all test environments build S3. if self.ss_name == 's3_store': #config = '=(config=\"(verbose=1)\")' extlist.skip_if_missing = True #if self.ss_name == 'dir_store': #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")' # Windows doesn't support dynamically loaded extension libraries. if os.name == 'nt': extlist.skip_if_missing = True extlist.extension('storage_sources', self.ss_name + config) def check(self, tc, n): for i in range(0, n): self.assertEqual(tc[str(i)], str(i)) tc.set_key(str(n)) self.assertEquals(tc.search(), wiredtiger.WT_NOTFOUND) def test_tiered(self): # Default cache location is cache-<bucket-name> cache = "cache-" + self.bucket # The bucket format for the S3 store is the name and the region separataed by a semi-colon. # Strip off the region to get the cache folder. if self.ss_name == 's3_store': cache = cache[:cache.find(';')] # Create a table. Add some data. Checkpoint and flush tier. # We have configured the timing stress for tiered caching which delays # the internal thread calling flush_finish for 1 second. # So after flush tier completes, check that the cached object does not # exist. Then sleep and check that it does exist. # # The idea is to make sure flush_tier is not waiting for unnecessary work # to be done, but returns as soon as the copying to shared storage completes. self.session.create(self.uri, 'key_format=S,value_format=S,') # Add data. Checkpoint and flush. c = self.session.open_cursor(self.uri) c["0"] = "0" self.check(c, 1) c.close() self.session.checkpoint() self.session.flush_tier(None) # On directory store, the bucket object should exist. if self.ss_name == 'dir_store': bucket_obj = os.path.join(self.bucket, self.bucket_prefix + self.obj1file) self.assertTrue(os.path.exists(bucket_obj)) # Sleep more than the one second stress timing amount and give the thread time to run. time.sleep(2) # After sleeping, the internal thread should have created the cached object. cache_obj = os.path.join(cache, self.bucket_prefix + self.obj1file) self.assertTrue(os.path.exists(cache_obj))
class test_tiered03(wttest.WiredTigerTestCase): K = 1024 M = 1024 * K G = 1024 * M # TODO: tiered: change this to a table: URI, otherwise we are # not using tiered files. The use of a second directory for # sharing would probably need to be reworked. uri = 'file:test_tiered03' storage_sources = [ ('dirstore', dict(auth_token=get_auth_token('dir_store'), bucket=get_bucket1_name('dir_store'), bucket_prefix="pfx_", ss_name='dir_store')), ('s3', dict(auth_token=get_auth_token('s3_store'), bucket=get_bucket1_name('s3_store'), bucket_prefix=generate_s3_prefix(), ss_name='s3_store')), ] # Occasionally add a lot of records to vary the amount of work flush does. record_count_scenarios = wtscenario.quick_scenarios( 'nrecs', [10, 10000], [0.9, 0.1]) scenarios = wtscenario.make_scenarios(storage_sources, record_count_scenarios, prune=100, prunelong=500) absolute_bucket_dir = None # initialied in conn_config to an absolute path def conn_config(self): bucket_ret = self.bucket # The bucket format for the S3 store is the name and the region separataed by a semi-colon. if self.ss_name == 's3_store': cache_dir = self.bucket[:self.bucket.find(';')] + '-cache' else: cache_dir = self.bucket + '-cache' # We have multiple connections that want to share a bucket. # For the directory store, the first time this function is called, we'll # establish the absolute path for the bucket, and always use that for # the bucket name. # The cache directory name is a relative one, so it won't be shared # between connections. if self.ss_name == 'dir_store': if self.absolute_bucket_dir == None: self.absolute_bucket_dir = os.path.join( os.getcwd(), self.bucket) os.mkdir(self.absolute_bucket_dir) bucket_ret = self.absolute_bucket_dir return \ 'debug_mode=(flush_checkpoint=true),' + \ 'tiered_storage=(auth_token=%s,' % self.auth_token + \ 'bucket=%s,' % bucket_ret + \ 'cache_directory=%s,' % cache_dir + \ 'bucket_prefix=%s,' % self.bucket_prefix + \ 'name=%s)' % self.ss_name # Load the storage store extension. def conn_extensions(self, extlist): config = '' # S3 store is built as an optional loadable extension, not all test environments build S3. if self.ss_name == 's3_store': #config = '=(config=\"(verbose=1)\")' extlist.skip_if_missing = True #if self.ss_name == 'dir_store': #config = '=(config=\"(verbose=1,delay_ms=200,force_delay=3)\")' # Windows doesn't support dynamically loaded extension libraries. if os.name == 'nt': extlist.skip_if_missing = True extlist.extension('storage_sources', self.ss_name + config) # Test sharing data between a primary and a secondary def test_sharing(self): # FIXME: WT-8235 Enable the test once file containing transaction ids is supported. self.skipTest( 'Sharing the checkpoint file containing transaction ids is not supported' ) ds = SimpleDataSet(self, self.uri, 10) ds.populate() ds.check() self.session.checkpoint() ds.check() # Create a secondary database dir2 = os.path.join(self.home, 'SECONDARY') os.mkdir(dir2) conn2 = self.setUpConnectionOpen(dir2) session2 = conn2.open_session() # Reference the tree from the secondary: metac = self.session.open_cursor('metadata:') metac2 = session2.open_cursor('metadata:', None, 'readonly=0') uri2 = self.uri[:5] + '../' + self.uri[5:] metac2[uri2] = metac[self.uri] + ",readonly=1" cursor2 = session2.open_cursor(uri2) ds.check_cursor(cursor2) cursor2.close() newds = SimpleDataSet(self, self.uri, 10000) newds.populate() newds.check() self.session.checkpoint() newds.check() # Check we can still read from the last checkpoint cursor2 = session2.open_cursor(uri2) ds.check_cursor(cursor2) cursor2.close() # Bump to new checkpoint origmeta = metac[self.uri] checkpoint = re.search(r',checkpoint=\(.+?\)\)', origmeta).group(0)[1:] self.pr('Orig checkpoint: ' + checkpoint) session2.alter(uri2, checkpoint) self.pr('New metadata on secondaery: ' + metac2[uri2]) # Check that we can see the new data cursor2 = session2.open_cursor(uri2) newds.check_cursor(cursor2)