def test_list_methods(self): fake_bucket = Path('s3://stor-test-bucket2') with self.assertRaises(exceptions.NotFoundError): fake_bucket.list() fake_folder = self.test_bucket / 'not_a_dir' self.assertEquals([], fake_folder.list()) with NamedTemporaryDirectory(change_dir=True): open('file1.txt', 'w').close() open('file2.txt', 'w').close() os.mkdir('nested_dir') os.mkdir('nested_dir/dir') open('nested_dir/dir/file3.txt', 'w').close() self.test_dir.upload(['.']) file_list = self.test_dir.list() starts_with_list = self.test_bucket.list(starts_with='test') self.assertEquals(set(file_list), set(starts_with_list)) self.assertEquals( set(file_list), set([ self.test_dir / 'file1.txt', self.test_dir / 'file2.txt', self.test_dir / 'nested_dir/dir/file3.txt' ])) dir_list = self.test_dir.listdir() self.assertEquals( set(dir_list), set([ self.test_dir / 'file1.txt', self.test_dir / 'file2.txt', self.test_dir / 'nested_dir/' ])) self.assertTrue(self.test_dir.listdir() == (self.test_dir + '/').listdir())
def test_static_large_obj_copy_and_segment_container(self): with NamedTemporaryDirectory(change_dir=True) as tmp_d: segment_size = 1048576 obj_size = segment_size * 4 + 100 self.create_dataset(tmp_d, 1, obj_size) obj_path = stor.join(tmp_d, self.get_dataset_obj_names(1)[0]) options = {'swift:upload': {'segment_size': segment_size}} with settings.use(options): obj_path.copy(self.test_container / 'large_object.txt') # Verify there is a segment container and that it can be ignored when listing a dir segment_container = Path(self.test_container.parent) / ('.segments_%s' % self.test_container.name) # noqa containers = Path(self.test_container.parent).listdir(ignore_segment_containers=False) self.assertTrue(segment_container in containers) self.assertTrue(self.test_container in containers) containers = Path(self.test_container.parent).listdir(ignore_segment_containers=True) self.assertFalse(segment_container in containers) self.assertTrue(self.test_container in containers) # Verify there are five segments objs = set(segment_container.list(condition=lambda results: len(results) == 5)) self.assertEquals(len(objs), 5) # Copy back the large object and verify its contents obj_path = Path(tmp_d) / 'large_object.txt' Path(self.test_container / 'large_object.txt').copy(obj_path) self.assertCorrectObjectContents(obj_path, self.get_dataset_obj_names(1)[0], obj_size)
class SwiftIntegrationTest(BaseIntegrationTest.BaseTestCases): def setUp(self): super(SwiftIntegrationTest, self).setUp() if not os.environ.get('SWIFT_TEST_USERNAME'): raise unittest.SkipTest( 'SWIFT_TEST_USERNAME env var not set. Skipping integration test') # Disable loggers so nose output wont be trashed logging.getLogger('requests').setLevel(logging.CRITICAL) logging.getLogger('swiftclient').setLevel(logging.CRITICAL) logging.getLogger('keystoneclient').setLevel(logging.CRITICAL) settings.update({ 'swift': { 'username': os.environ.get('SWIFT_TEST_USERNAME'), 'password': os.environ.get('SWIFT_TEST_PASSWORD'), 'num_retries': 5 }}) # fall back on to swiftstack auth for tenant tenant = os.environ.get('SWIFT_TEST_TENANT', 'AUTH_%s' % os.environ['SWIFT_TEST_USERNAME']) self.test_container = Path('swift://%s/%s' % (tenant, uuid.uuid4())) if self.test_container.exists(): raise ValueError('test container %s already exists.' % self.test_container) try: self.test_container.post() except BaseException: self.test_container.rmtree() raise self.test_dir = self.test_container / 'test' def tearDown(self): super(SwiftIntegrationTest, self).tearDown() self.test_container.rmtree() def test_cached_auth_and_auth_invalidation(self): from swiftclient.client import get_auth_keystone as real_get_keystone swift._clear_cached_auth_credentials() tenant = self.test_container.tenant with mock.patch('swiftclient.client.get_auth_keystone', autospec=True) as mock_get_ks: mock_get_ks.side_effect = real_get_keystone s = Path(self.test_container).stat() self.assertEquals(s['Account'], tenant) self.assertEquals(len(mock_get_ks.call_args_list), 1) # The keystone auth should not be called on another stat mock_get_ks.reset_mock() s = Path(self.test_container).stat() self.assertEquals(s['Account'], tenant) self.assertEquals(len(mock_get_ks.call_args_list), 0) # Set the auth cache to something bad. The auth keystone should # be called twice on another stat. It's first called by the swiftclient # when retrying auth (with the bad token) and then called by us without # a token after the swiftclient raises an authorization error. mock_get_ks.reset_mock() swift._cached_auth_token_map[tenant]['creds']['os_auth_token'] = 'bad_auth' s = Path(self.test_container).stat() self.assertEquals(s['Account'], tenant) self.assertEquals(len(mock_get_ks.call_args_list), 2) # Note that the auth_token is passed into the keystone client but then popped # from the kwargs. Assert that an auth token is no longer part of the retry calls self.assertTrue('auth_token' not in mock_get_ks.call_args_list[0][0][3]) self.assertTrue('auth_token' not in mock_get_ks.call_args_list[1][0][3]) # Now make the auth always be invalid and verify that an auth error is thrown # This also tests that keystone auth errors are propagated as swift # AuthenticationErrors mock_get_ks.reset_mock() swift._clear_cached_auth_credentials() with mock.patch('keystoneclient.v2_0.client.Client') as mock_ks_client: from keystoneclient.exceptions import Unauthorized mock_ks_client.side_effect = Unauthorized with self.assertRaises(swift.AuthenticationError): Path(self.test_container).stat() # Verify that getting the auth was called two more times because of retry # logic self.assertEquals(len(mock_get_ks.call_args_list), 2) def test_static_large_obj_copy_and_segment_container(self): with NamedTemporaryDirectory(change_dir=True) as tmp_d: segment_size = 1048576 obj_size = segment_size * 4 + 100 self.create_dataset(tmp_d, 1, obj_size) obj_path = stor.join(tmp_d, self.get_dataset_obj_names(1)[0]) options = {'swift:upload': {'segment_size': segment_size}} with settings.use(options): obj_path.copy(self.test_container / 'large_object.txt') # Verify there is a segment container and that it can be ignored when listing a dir segment_container = Path(self.test_container.parent) / ('.segments_%s' % self.test_container.name) # noqa containers = Path(self.test_container.parent).listdir(ignore_segment_containers=False) self.assertTrue(segment_container in containers) self.assertTrue(self.test_container in containers) containers = Path(self.test_container.parent).listdir(ignore_segment_containers=True) self.assertFalse(segment_container in containers) self.assertTrue(self.test_container in containers) # Verify there are five segments objs = set(segment_container.list(condition=lambda results: len(results) == 5)) self.assertEquals(len(objs), 5) # Copy back the large object and verify its contents obj_path = Path(tmp_d) / 'large_object.txt' Path(self.test_container / 'large_object.txt').copy(obj_path) self.assertCorrectObjectContents(obj_path, self.get_dataset_obj_names(1)[0], obj_size) @unittest.skipIf(not os.environ.get('OS_TEMP_URL_KEY'), 'No temp url key set') def test_temp_url(self): basic_file = 'test.txt' complex_file = 'my test?file=special_chars.txt' with NamedTemporaryDirectory(change_dir=True) as tmp_d: nested_tmp_dir = stor.join(tmp_d, 'tmp') os.mkdir(nested_tmp_dir) basic_file_p = stor.join(nested_tmp_dir, basic_file) complex_file_p = stor.join(nested_tmp_dir, 'my test?file=special_chars.txt') with stor.open(basic_file_p, 'w') as f: f.write('basic test') with stor.open(complex_file_p, 'w') as f: f.write('complex test') self.test_container.upload(['.']) with NamedTemporaryDirectory(change_dir=True) as tmp_d: basic_obj = stor.Path( stor.join(self.test_container, 'tmp', basic_file)) basic_temp_url = basic_obj.temp_url(inline=False, filename=basic_file) r = requests.get(basic_temp_url) self.assertEquals(r.content, 'basic test') self.assertEquals(r.headers['Content-Disposition'], 'attachment; filename="test.txt"; filename*=UTF-8\'\'test.txt') complex_obj = stor.Path( stor.join(self.test_container, 'tmp', complex_file)) complex_temp_url = complex_obj.temp_url(inline=False, filename=complex_file) r = requests.get(complex_temp_url) self.assertEquals(r.content, 'complex test') self.assertEquals(r.headers['Content-Disposition'], 'attachment; filename="my test%3Ffile%3Dspecial_chars.txt"; filename*=UTF-8\'\'my%20test%3Ffile%3Dspecial_chars.txt') # noqa def test_condition_failures(self): num_test_objs = 20 test_obj_size = 100 test_dir = self.test_container / 'test' with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) Path('.').copytree(test_dir) # Verify a ConditionNotMet exception is thrown when attempting to list # a file that hasn't been uploaded expected_objs = { test_dir / which_obj for which_obj in self.get_dataset_obj_names(num_test_objs + 1) } num_retries = settings.get()['swift']['num_retries'] with mock.patch('time.sleep') as mock_sleep: with self.assertRaises(swift.ConditionNotMetError): test_dir.list(condition=lambda results: expected_objs == set(results)) self.assertTrue(num_retries > 0) self.assertEquals(len(mock_sleep.call_args_list), num_retries) # Verify that the condition passes when excluding the non-extant file expected_objs = { test_dir / which_obj for which_obj in self.get_dataset_obj_names(num_test_objs) } objs = test_dir.list(condition=lambda results: expected_objs == set(results)) self.assertEquals(expected_objs, set(objs)) def test_list_glob(self): num_test_objs = 20 test_obj_size = 100 test_dir = self.test_container / 'test' with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) Path('.').copytree(test_dir) objs = set(test_dir.list(condition=lambda results: len(results) == num_test_objs)) expected_objs = { test_dir / obj_name for obj_name in self.get_dataset_obj_names(num_test_objs) } self.assertEquals(len(objs), num_test_objs) self.assertEquals(objs, expected_objs) expected_glob = { test_dir / obj_name for obj_name in self.get_dataset_obj_names(num_test_objs) if obj_name.startswith('1') } self.assertTrue(len(expected_glob) > 1) globbed_objs = set( test_dir.glob('1*', condition=lambda results: len(results) == len(expected_glob))) self.assertEquals(globbed_objs, expected_glob) def test_copytree_w_headers(self): with NamedTemporaryDirectory(change_dir=True) as tmp_d: open(tmp_d / 'test_obj', 'w').close() stor.copytree( '.', self.test_container, headers=['X-Delete-After:1000']) obj = stor.join(self.test_container, 'test_obj') stat_results = obj.stat() self.assertTrue('x-delete-at' in stat_results['headers']) def test_rmtree(self): with NamedTemporaryDirectory(change_dir=True) as tmp_d: # Make a couple empty test files and nested files tmp_d = Path(tmp_d) os.mkdir(tmp_d / 'my_dir') open(tmp_d / 'my_dir' / 'dir_file1', 'w').close() open(tmp_d / 'my_dir' / 'dir_file2', 'w').close() open(tmp_d / 'base_file1', 'w').close() open(tmp_d / 'base_file2', 'w').close() stor.copytree( '.', self.test_container, use_manifest=True) swift_dir = self.test_container / 'my_dir' self.assertEquals(len(swift_dir.list()), 2) swift_dir.rmtree() self.assertEquals(len(swift_dir.list()), 0) base_contents = self.test_container.list() self.assertTrue((self.test_container / 'base_file1') in base_contents) self.assertTrue((self.test_container / 'base_file1') in base_contents) self.test_container.rmtree() # TODO figure out a better way to test that the container no longer exists. with self.assertRaises(swift.NotFoundError): # Replication may have not happened yet for container deletion. Keep # listing in intervals until a NotFoundError is thrown for i in (0, 1, 3): time.sleep(i) self.test_container.list() def test_is_methods(self): container = self.test_container container = self.test_container file_with_prefix = stor.join(container, 'analysis.txt') # ensure container is created but empty container.post() self.assertTrue(stor.isdir(container)) self.assertFalse(stor.isfile(container)) self.assertTrue(stor.exists(container)) self.assertFalse(stor.listdir(container)) folder = stor.join(container, 'analysis') subfolder = stor.join(container, 'analysis', 'alignments') file_in_folder = stor.join(container, 'analysis', 'alignments', 'bam.bam') self.assertFalse(stor.exists(file_in_folder)) self.assertFalse(stor.isdir(folder)) self.assertFalse(stor.isdir(folder + '/')) with stor.open(file_with_prefix, 'w') as fp: fp.write('data\n') self.assertFalse(stor.isdir(folder)) self.assertTrue(stor.isfile(file_with_prefix)) with stor.open(file_in_folder, 'w') as fp: fp.write('blah.txt\n') self.assertTrue(stor.isdir(folder)) self.assertFalse(stor.isfile(folder)) self.assertTrue(stor.isdir(subfolder)) def test_metadata_pulling(self): file_in_folder = stor.join(self.test_container, 'somefile.svg') with stor.open(file_in_folder, 'w') as fp: fp.write('12345\n') self.assertEqual(stor.getsize(file_in_folder), 6) stat_data = stor.Path(file_in_folder).stat() self.assertIn('Content-Type', stat_data) self.assertEqual(stat_data['Content-Type'], 'image/svg+xml') def test_push_metadata(self): if self.test_container.tenant != 'AUTH_swft_test': raise unittest.SkipTest('test only works with admin rights') obj = self.test_container / 'object.txt' with obj.open('w') as fp: fp.write('a\n') obj.post({'header': ['X-Object-Meta-Custom:text']}) stat_data = obj.stat() # TODO(jtratner): consider validating x-object-meta vs. # x-container-meta (otherwise headers won't take) self.assertIn('x-object-meta-custom', stat_data['headers']) self.assertEqual(stat_data['headers']['x-object-meta-custom'], 'text') self.test_container.post({'header': ['X-Container-Meta-Exciting:value'], 'read_acl': '.r:*'}) stat_data = self.test_container.stat() self.assertEqual(stat_data['Read-ACL'], '.r:*') self.assertIn('x-container-meta-exciting', stat_data['headers']) self.assertEqual(stat_data['headers']['x-container-meta-exciting'], 'value') self.test_container.post({'read_acl': '.r:example.com'}) self.assertEqual(self.test_container.stat()['Read-ACL'], '.r:example.com') def test_copytree_to_from_dir_w_manifest(self): num_test_objs = 10 test_obj_size = 100 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) # Make a nested file and an empty directory for testing purposes tmp_d = Path(tmp_d) os.mkdir(tmp_d / 'my_dir') open(tmp_d / 'my_dir' / 'empty_file', 'w').close() os.mkdir(tmp_d / 'my_dir' / 'empty_dir') stor.copytree( '.', self.test_dir, use_manifest=True) # Validate the contents of the manifest file manifest_contents = utils.get_data_manifest_contents(self.test_dir) expected_contents = self.get_dataset_obj_names(num_test_objs) expected_contents.extend(['my_dir/empty_file', 'my_dir/empty_dir']) expected_contents = [Path('test') / c for c in expected_contents] self.assertEquals(set(manifest_contents), set(expected_contents)) with NamedTemporaryDirectory(change_dir=True) as tmp_d: # Download the results successfully Path(self.test_dir).copytree( 'test', use_manifest=True) # Now delete one of the objects from swift. A second download # will fail with a condition error Path(self.test_dir / 'my_dir' / 'empty_dir').remove() with self.assertRaises(exceptions.ConditionNotMetError): Path(self.test_dir).copytree( 'test', use_manifest=True, num_retries=0) def test_all_segment_container_types_are_deleted(self): segment_containers = [stor.join('swift://' + self.test_container.tenant, fmt % self.test_container.name) for fmt in ('.segments_%s', '%s+segments', '%s_segments')] all_containers = segment_containers + [self.test_container] test_files = [stor.join(c, 'test_file_tbdeleted.txt') for c in all_containers] for t in test_files: with stor.open(t, 'w') as fp: fp.write('testtxt\n') assert all(t.exists() for t in test_files) stor.rmtree(self.test_container) for t in test_files: assert not t.exists(), 'Did not delete %s' % t def test_upload_multiple_dirs(self): with NamedTemporaryDirectory(change_dir=True) as tmp_d: num_test_objs = 10 tmp_d = Path(tmp_d) # Create files filled with random data. path1 = tmp_d / 'dir1' os.mkdir(path1) self.create_dataset(path1, num_test_objs, 10) # Create empty dir and file. path2 = tmp_d / 'dir2' os.mkdir(path2) os.mkdir(path2 / 'my_dir') open(path2 / 'my_dir' / 'included_file', 'w').close() open(path2 / 'my_dir' / 'excluded_file', 'w').close() os.mkdir(path2 / 'my_dir' / 'included_dir') os.mkdir(path2 / 'my_dir' / 'excluded_dir') # Create file in the top level directory. open(tmp_d / 'top_level_file', 'w').close() to_upload = [ 'dir1', 'dir2/my_dir/included_file', 'dir2/my_dir/included_dir', 'top_level_file', ] with tmp_d: swift_path = self.test_dir / 'subdir' swift_path.upload(to_upload, use_manifest=True) # Validate the contents of the manifest file manifest_contents = utils.get_data_manifest_contents(swift_path) expected_contents = [ Path('dir1') / name for name in self.get_dataset_obj_names(num_test_objs) ] expected_contents.extend([ 'dir2/my_dir/included_file', 'dir2/my_dir/included_dir', 'top_level_file', ]) expected_contents = [Path('test/subdir') / c for c in expected_contents] self.assertEquals(set(manifest_contents), set(expected_contents))
class S3IntegrationTest(BaseIntegrationTest.BaseTestCases): """ Integration tests for S3. Note that for now, while upload/download/remove methods are not implemented, tests will use the existing stor-test-bucket bucket on S3. In order to run the tests, you must have valid AWS S3 credentials set in the following environment variables: AWS_TEST_ACCESS_KEY_ID, AWS_TEST_SECRET_ACCESS_KEY (and optionally AWS_DEFAULT_REGION). """ def setUp(self): super(S3IntegrationTest, self).setUp() if not (os.environ.get('AWS_TEST_ACCESS_KEY_ID') and os.environ.get('AWS_TEST_SECRET_ACCESS_KEY') and os.environ.get('S3_TEST_BUCKET')): raise unittest.SkipTest( 'AWS_TEST_ACCESS_KEY_ID / AWS_TEST_SECRET_ACCESS_KEY / S3_TEST_BUCKET ' ' env vars not set. Skipping integration test') # Disable loggers so nose output is clean logging.getLogger('botocore').setLevel(logging.CRITICAL) test_bucket = os.environ['S3_TEST_BUCKET'] self.test_bucket = Path('s3://{test_bucket}/{uuid}'.format( test_bucket=test_bucket, uuid=uuid.uuid4())) self.test_dir = self.test_bucket / 'test' stor.settings.update({ 's3': { 'aws_access_key_id': os.environ['AWS_TEST_ACCESS_KEY_ID'], 'aws_secret_access_key': os.environ['AWS_TEST_SECRET_ACCESS_KEY'] } }) def tearDown(self): super(S3IntegrationTest, self).tearDown() self.test_dir.rmtree() def test_over_1000_files(self): num_test_objs = 1234 min_obj_size = 0 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, min_obj_size) self.test_dir.upload(['.']) self.assertEquals(1234, len(self.test_dir.list())) self.assertEquals(1200, len(self.test_dir.list(limit=1200))) self.assertTrue(self.test_dir.isdir()) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.test_dir.download('./') self.assertEquals(1234, len(os.listdir(tmp_d))) def test_list_methods(self): fake_bucket = Path('s3://stor-test-bucket2') with self.assertRaises(exceptions.NotFoundError): fake_bucket.list() fake_folder = self.test_bucket / 'not_a_dir' self.assertEquals([], fake_folder.list()) with NamedTemporaryDirectory(change_dir=True): open('file1.txt', 'w').close() open('file2.txt', 'w').close() os.mkdir('nested_dir') os.mkdir('nested_dir/dir') open('nested_dir/dir/file3.txt', 'w').close() self.test_dir.upload(['.']) file_list = self.test_dir.list() starts_with_list = self.test_bucket.list(starts_with='test') self.assertEquals(set(file_list), set(starts_with_list)) self.assertEquals( set(file_list), set([ self.test_dir / 'file1.txt', self.test_dir / 'file2.txt', self.test_dir / 'nested_dir/dir/file3.txt' ])) dir_list = self.test_dir.listdir() self.assertEquals( set(dir_list), set([ self.test_dir / 'file1.txt', self.test_dir / 'file2.txt', self.test_dir / 'nested_dir/' ])) self.assertTrue(self.test_dir.listdir() == (self.test_dir + '/').listdir()) def test_is_methods(self): """ Tests is methods, exists(), and getsize(). getsize() integration test may be moved to a different test depending on whether other metadata methods (such as stat()) are implemented. """ self.assertTrue(self.test_bucket.exists()) self.assertTrue(self.test_bucket.isdir()) self.assertFalse(self.test_bucket.isfile()) self.assertEquals(self.test_bucket.getsize(), 0) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, 1, 10) self.test_dir.upload(['.']) correct_size = os.path.getsize('0') self.assertTrue(self.test_dir.exists()) self.assertTrue(self.test_dir.isdir()) self.assertFalse(self.test_dir.isfile()) self.assertEquals(self.test_dir.getsize(), 0) test_file = self.test_dir / '0' self.assertTrue(test_file.exists()) self.assertFalse(test_file.isdir()) self.assertTrue(test_file.isfile()) self.assertEquals(test_file.getsize(), correct_size) test_file.remove() self.assertFalse(test_file.exists()) self.assertFalse(test_file.isdir()) self.assertFalse(test_file.isfile()) with self.assertRaises(exceptions.NotFoundError): test_file.getsize() fake_bucket = self.test_bucket + '2' self.assertFalse(fake_bucket.exists()) self.assertFalse(fake_bucket.isdir()) self.assertFalse(fake_bucket.isfile()) with self.assertRaises(exceptions.NotFoundError): fake_bucket.getsize() def test_upload_download_remove(self): num_test_objs = 10 min_obj_size = 50 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, min_obj_size) self.test_dir.upload(['.']) for which_obj in self.get_dataset_obj_names(num_test_objs): self.assertTrue((self.test_dir / which_obj).exists()) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.test_dir.download(tmp_d) for which_obj in self.get_dataset_obj_names(num_test_objs): self.assertCorrectObjectContents(which_obj, which_obj, min_obj_size) (self.test_dir / which_obj).remove() # consistency check while (self.test_dir / which_obj).exists(): time.sleep(.5) self.assertFalse((self.test_dir / which_obj).exists()) def test_upload_w_headers(self): test_file = self.test_dir / 'a.txt' with NamedTemporaryDirectory(change_dir=True): open('a.txt', 'w').close() self.test_dir.upload(['.'], headers={'ContentLanguage': 'en'}) self.assertTrue(test_file.exists()) self.assertEquals(test_file.stat()['ContentLanguage'], 'en') def test_download(self): with NamedTemporaryDirectory(change_dir=True): os.mkdir('dir') os.mkdir('dir/a') open('dir/a/a.txt', 'w').close() self.test_dir.upload(['.']) with NamedTemporaryDirectory(change_dir=True): open('dir', 'w').close() open('a', 'w').close() with self.assertRaises(OSError): self.test_dir.download('.') with self.assertRaises(OSError): (self.test_dir / 'dir').download('.') def test_condition(self): num_test_objs = 20 test_obj_size = 100 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) Path('.').copytree(self.test_dir) # Verify a ConditionNotMet exception is thrown when attempting to list # a file that hasn't been uploaded expected_objs = { self.test_dir / which_obj for which_obj in self.get_dataset_obj_names(num_test_objs + 1) } with self.assertRaises(exceptions.ConditionNotMetError): self.test_dir.list( condition=lambda results: expected_objs == set(results)) # Verify that the condition passes when excluding the non-extant file correct_objs = { self.test_dir / which_obj for which_obj in self.get_dataset_obj_names(num_test_objs) } objs = self.test_dir.list( condition=lambda results: correct_objs == set(results)) self.assertEquals(correct_objs, set(objs)) def test_dir_markers(self): with NamedTemporaryDirectory(change_dir=True): os.mkdir('empty') os.mkdir('dir') open('a.txt', 'w').close() open('dir/b.txt', 'w').close() self.test_dir.upload(['.']) self.assertEquals( set(self.test_dir.list()), { self.test_dir / 'a.txt', self.test_dir / 'dir/b.txt', self.test_dir / 'empty/' }) self.assertEquals( set(self.test_dir.list(ignore_dir_markers=True)), {self.test_dir / 'a.txt', self.test_dir / 'dir/b.txt'}) self.assertTrue((self.test_dir / 'empty').isdir()) with NamedTemporaryDirectory(change_dir=True): self.test_dir.download('.') self.assertTrue(os.path.isdir('empty')) self.assertTrue(os.path.exists('dir/b.txt')) self.assertTrue(os.path.exists('a.txt')) def test_copytree_to_from_dir_w_manifest(self): num_test_objs = 10 test_obj_size = 100 with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) # Make a nested file and an empty directory for testing purposes tmp_d = Path(tmp_d) os.mkdir(tmp_d / 'my_dir') open(tmp_d / 'my_dir' / 'empty_file', 'w').close() os.mkdir(tmp_d / 'my_dir' / 'empty_dir') stor.copytree('.', self.test_dir, use_manifest=True) # Validate the contents of the manifest file manifest_contents = utils.get_data_manifest_contents(self.test_dir) expected_contents = self.get_dataset_obj_names(num_test_objs) expected_contents.extend( ['my_dir/empty_file', 'my_dir/empty_dir/']) expected_contents = [Path('test') / c for c in expected_contents] self.assertEquals(set(manifest_contents), set(expected_contents)) with NamedTemporaryDirectory(change_dir=True) as tmp_d: # Download the results successfully Path(self.test_dir).copytree('test', use_manifest=True) # Now delete one of the objects from s3. A second download # will fail with a condition error Path(self.test_dir / 'my_dir' / 'empty_dir/').remove() with self.assertRaises(exceptions.ConditionNotMetError): Path(self.test_dir).copytree('test', use_manifest=True, num_retries=0) def test_multipart_transfer(self): logger = six.StringIO() handler = logging.StreamHandler(logger) logging.getLogger('botocore').setLevel(logging.DEBUG) logging.getLogger('botocore').addHandler(handler) handler.setLevel(logging.DEBUG) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, 1, 10 * 1024 * 1024) self.test_dir.upload(['.']) self.assertEquals(1, len(self.test_dir.listdir())) with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.test_dir.download('.') self.assertEquals(1, len(Path('.').listdir())) self.assertIn("CompleteMultipartUploadResult", logger.getvalue()) # Check for multipart download by checking for multiple 206 GET requests # to the object self.assertRegexpMatches( logger.getvalue(), '"GET (/stor-test-bucket)?/test/0 HTTP/1.1" 206')