Пример #1
0
def NamedTemporaryDirectory(suffix='',
                            prefix='tmp',
                            dir=None,
                            change_dir=False):
    """Context manager for creating and deleting temporary directory.

    Mimics the behavior of tempfile.NamedTemporaryFile.

    Arguments:
        suffix (str): If specified, the dir name will end with it.
        prefix (str): If specified, the dir name will start with it,
            otherwise 'tmp' is used.
        dir (str): If specified, the dir will be created in this
            directory.
        change_dir (bool): If specified, will change to the temporary
            directory.

    Yields:
        Path: The temporary directory.

    Note:
        Name is CamelCase to match tempfile.NamedTemporaryFile.

    Examples:
        >>> from stor import NamedTemporaryDirectory
        >>> with NamedTemporaryDirectory() as d:
        >>>     # Do operations within "d", which will be deleted afterwards
    """
    from stor import Path

    tempdir = Path(tempfile.mkdtemp(suffix, prefix, dir))
    try:
        if change_dir:
            with tempdir:
                yield tempdir
        else:
            yield tempdir
    finally:
        tempdir.rmtree()
Пример #2
0
 def teardown_posix_files(self):
     posix_p = Path('./{test_folder}'.format(test_folder=self.project))
     posix_p.rmtree()
Пример #3
0
class SwiftIntegrationTest(BaseIntegrationTest.BaseTestCases):
    def setUp(self):
        super(SwiftIntegrationTest, self).setUp()

        if not os.environ.get('SWIFT_TEST_USERNAME'):
            raise unittest.SkipTest(
                'SWIFT_TEST_USERNAME env var not set. Skipping integration test')

        # Disable loggers so nose output wont be trashed
        logging.getLogger('requests').setLevel(logging.CRITICAL)
        logging.getLogger('swiftclient').setLevel(logging.CRITICAL)
        logging.getLogger('keystoneclient').setLevel(logging.CRITICAL)

        settings.update({
            'swift': {
                'username': os.environ.get('SWIFT_TEST_USERNAME'),
                'password': os.environ.get('SWIFT_TEST_PASSWORD'),
                'num_retries': 5
            }})
        # fall back on to swiftstack auth for tenant
        tenant = os.environ.get('SWIFT_TEST_TENANT', 'AUTH_%s' % os.environ['SWIFT_TEST_USERNAME'])

        self.test_container = Path('swift://%s/%s' % (tenant, uuid.uuid4()))
        if self.test_container.exists():
            raise ValueError('test container %s already exists.' % self.test_container)

        try:
            self.test_container.post()
        except BaseException:
            self.test_container.rmtree()
            raise

        self.test_dir = self.test_container / 'test'

    def tearDown(self):
        super(SwiftIntegrationTest, self).tearDown()
        self.test_container.rmtree()

    def test_cached_auth_and_auth_invalidation(self):
        from swiftclient.client import get_auth_keystone as real_get_keystone
        swift._clear_cached_auth_credentials()
        tenant = self.test_container.tenant
        with mock.patch('swiftclient.client.get_auth_keystone', autospec=True) as mock_get_ks:
            mock_get_ks.side_effect = real_get_keystone
            s = Path(self.test_container).stat()
            self.assertEquals(s['Account'], tenant)
            self.assertEquals(len(mock_get_ks.call_args_list), 1)

            # The keystone auth should not be called on another stat
            mock_get_ks.reset_mock()
            s = Path(self.test_container).stat()
            self.assertEquals(s['Account'], tenant)
            self.assertEquals(len(mock_get_ks.call_args_list), 0)

            # Set the auth cache to something bad. The auth keystone should
            # be called twice on another stat. It's first called by the swiftclient
            # when retrying auth (with the bad token) and then called by us without
            # a token after the swiftclient raises an authorization error.
            mock_get_ks.reset_mock()
            swift._cached_auth_token_map[tenant]['creds']['os_auth_token'] = 'bad_auth'
            s = Path(self.test_container).stat()
            self.assertEquals(s['Account'], tenant)
            self.assertEquals(len(mock_get_ks.call_args_list), 2)
            # Note that the auth_token is passed into the keystone client but then popped
            # from the kwargs. Assert that an auth token is no longer part of the retry calls
            self.assertTrue('auth_token' not in mock_get_ks.call_args_list[0][0][3])
            self.assertTrue('auth_token' not in mock_get_ks.call_args_list[1][0][3])

            # Now make the auth always be invalid and verify that an auth error is thrown
            # This also tests that keystone auth errors are propagated as swift
            # AuthenticationErrors
            mock_get_ks.reset_mock()
            swift._clear_cached_auth_credentials()
            with mock.patch('keystoneclient.v2_0.client.Client') as mock_ks_client:
                from keystoneclient.exceptions import Unauthorized
                mock_ks_client.side_effect = Unauthorized
                with self.assertRaises(swift.AuthenticationError):
                    Path(self.test_container).stat()

                # Verify that getting the auth was called two more times because of retry
                # logic
                self.assertEquals(len(mock_get_ks.call_args_list), 2)

    def test_static_large_obj_copy_and_segment_container(self):
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            segment_size = 1048576
            obj_size = segment_size * 4 + 100
            self.create_dataset(tmp_d, 1, obj_size)
            obj_path = stor.join(tmp_d,
                                 self.get_dataset_obj_names(1)[0])
            options = {'swift:upload': {'segment_size': segment_size}}
            with settings.use(options):
                obj_path.copy(self.test_container / 'large_object.txt')

            # Verify there is a segment container and that it can be ignored when listing a dir
            segment_container = Path(self.test_container.parent) / ('.segments_%s' % self.test_container.name)  # noqa
            containers = Path(self.test_container.parent).listdir(ignore_segment_containers=False)
            self.assertTrue(segment_container in containers)
            self.assertTrue(self.test_container in containers)
            containers = Path(self.test_container.parent).listdir(ignore_segment_containers=True)
            self.assertFalse(segment_container in containers)
            self.assertTrue(self.test_container in containers)

            # Verify there are five segments
            objs = set(segment_container.list(condition=lambda results: len(results) == 5))
            self.assertEquals(len(objs), 5)

            # Copy back the large object and verify its contents
            obj_path = Path(tmp_d) / 'large_object.txt'
            Path(self.test_container / 'large_object.txt').copy(obj_path)
            self.assertCorrectObjectContents(obj_path, self.get_dataset_obj_names(1)[0], obj_size)

    @unittest.skipIf(not os.environ.get('OS_TEMP_URL_KEY'), 'No temp url key set')
    def test_temp_url(self):
        basic_file = 'test.txt'
        complex_file = 'my test?file=special_chars.txt'
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            nested_tmp_dir = stor.join(tmp_d, 'tmp')
            os.mkdir(nested_tmp_dir)
            basic_file_p = stor.join(nested_tmp_dir, basic_file)
            complex_file_p = stor.join(nested_tmp_dir, 'my test?file=special_chars.txt')

            with stor.open(basic_file_p, 'w') as f:
                f.write('basic test')
            with stor.open(complex_file_p, 'w') as f:
                f.write('complex test')

            self.test_container.upload(['.'])

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            basic_obj = stor.Path(
                stor.join(self.test_container, 'tmp', basic_file))
            basic_temp_url = basic_obj.temp_url(inline=False, filename=basic_file)
            r = requests.get(basic_temp_url)
            self.assertEquals(r.content, 'basic test')
            self.assertEquals(r.headers['Content-Disposition'],
                              'attachment; filename="test.txt"; filename*=UTF-8\'\'test.txt')

            complex_obj = stor.Path(
                stor.join(self.test_container, 'tmp', complex_file))
            complex_temp_url = complex_obj.temp_url(inline=False, filename=complex_file)
            r = requests.get(complex_temp_url)
            self.assertEquals(r.content, 'complex test')
            self.assertEquals(r.headers['Content-Disposition'],
                              'attachment; filename="my test%3Ffile%3Dspecial_chars.txt"; filename*=UTF-8\'\'my%20test%3Ffile%3Dspecial_chars.txt')  # noqa

    def test_condition_failures(self):
        num_test_objs = 20
        test_obj_size = 100
        test_dir = self.test_container / 'test'
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, test_obj_size)
            Path('.').copytree(test_dir)

        # Verify a ConditionNotMet exception is thrown when attempting to list
        # a file that hasn't been uploaded
        expected_objs = {
            test_dir / which_obj
            for which_obj in self.get_dataset_obj_names(num_test_objs + 1)
        }

        num_retries = settings.get()['swift']['num_retries']
        with mock.patch('time.sleep') as mock_sleep:
            with self.assertRaises(swift.ConditionNotMetError):
                test_dir.list(condition=lambda results: expected_objs == set(results))
            self.assertTrue(num_retries > 0)
            self.assertEquals(len(mock_sleep.call_args_list), num_retries)

        # Verify that the condition passes when excluding the non-extant file
        expected_objs = {
            test_dir / which_obj
            for which_obj in self.get_dataset_obj_names(num_test_objs)
        }
        objs = test_dir.list(condition=lambda results: expected_objs == set(results))
        self.assertEquals(expected_objs, set(objs))

    def test_list_glob(self):
        num_test_objs = 20
        test_obj_size = 100
        test_dir = self.test_container / 'test'
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, test_obj_size)
            Path('.').copytree(test_dir)

        objs = set(test_dir.list(condition=lambda results: len(results) == num_test_objs))
        expected_objs = {
            test_dir / obj_name
            for obj_name in self.get_dataset_obj_names(num_test_objs)
        }
        self.assertEquals(len(objs), num_test_objs)
        self.assertEquals(objs, expected_objs)

        expected_glob = {
            test_dir / obj_name
            for obj_name in self.get_dataset_obj_names(num_test_objs) if obj_name.startswith('1')
        }
        self.assertTrue(len(expected_glob) > 1)
        globbed_objs = set(
            test_dir.glob('1*', condition=lambda results: len(results) == len(expected_glob)))
        self.assertEquals(globbed_objs, expected_glob)

    def test_copytree_w_headers(self):
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            open(tmp_d / 'test_obj', 'w').close()
            stor.copytree(
                '.',
                self.test_container,
                headers=['X-Delete-After:1000'])

        obj = stor.join(self.test_container, 'test_obj')
        stat_results = obj.stat()
        self.assertTrue('x-delete-at' in stat_results['headers'])

    def test_rmtree(self):
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            # Make a couple empty test files and nested files
            tmp_d = Path(tmp_d)
            os.mkdir(tmp_d / 'my_dir')
            open(tmp_d / 'my_dir' / 'dir_file1', 'w').close()
            open(tmp_d / 'my_dir' / 'dir_file2', 'w').close()
            open(tmp_d / 'base_file1', 'w').close()
            open(tmp_d / 'base_file2', 'w').close()

            stor.copytree(
                '.',
                self.test_container,
                use_manifest=True)

            swift_dir = self.test_container / 'my_dir'
            self.assertEquals(len(swift_dir.list()), 2)
            swift_dir.rmtree()
            self.assertEquals(len(swift_dir.list()), 0)

            base_contents = self.test_container.list()
            self.assertTrue((self.test_container / 'base_file1') in base_contents)
            self.assertTrue((self.test_container / 'base_file1') in base_contents)

            self.test_container.rmtree()

            # TODO figure out a better way to test that the container no longer exists.
            with self.assertRaises(swift.NotFoundError):
                # Replication may have not happened yet for container deletion. Keep
                # listing in intervals until a NotFoundError is thrown
                for i in (0, 1, 3):
                    time.sleep(i)
                    self.test_container.list()

    def test_is_methods(self):
        container = self.test_container
        container = self.test_container
        file_with_prefix = stor.join(container, 'analysis.txt')

        # ensure container is created but empty
        container.post()
        self.assertTrue(stor.isdir(container))
        self.assertFalse(stor.isfile(container))
        self.assertTrue(stor.exists(container))
        self.assertFalse(stor.listdir(container))

        folder = stor.join(container, 'analysis')
        subfolder = stor.join(container, 'analysis', 'alignments')
        file_in_folder = stor.join(container, 'analysis', 'alignments',
                                   'bam.bam')
        self.assertFalse(stor.exists(file_in_folder))
        self.assertFalse(stor.isdir(folder))
        self.assertFalse(stor.isdir(folder + '/'))
        with stor.open(file_with_prefix, 'w') as fp:
            fp.write('data\n')
        self.assertFalse(stor.isdir(folder))
        self.assertTrue(stor.isfile(file_with_prefix))

        with stor.open(file_in_folder, 'w') as fp:
            fp.write('blah.txt\n')

        self.assertTrue(stor.isdir(folder))
        self.assertFalse(stor.isfile(folder))
        self.assertTrue(stor.isdir(subfolder))

    def test_metadata_pulling(self):
        file_in_folder = stor.join(self.test_container,
                                   'somefile.svg')
        with stor.open(file_in_folder, 'w') as fp:
            fp.write('12345\n')

        self.assertEqual(stor.getsize(file_in_folder), 6)
        stat_data = stor.Path(file_in_folder).stat()
        self.assertIn('Content-Type', stat_data)
        self.assertEqual(stat_data['Content-Type'], 'image/svg+xml')

    def test_push_metadata(self):
        if self.test_container.tenant != 'AUTH_swft_test':
            raise unittest.SkipTest('test only works with admin rights')
        obj = self.test_container / 'object.txt'
        with obj.open('w') as fp:
            fp.write('a\n')
        obj.post({'header': ['X-Object-Meta-Custom:text']})
        stat_data = obj.stat()
        # TODO(jtratner): consider validating x-object-meta vs.
        # x-container-meta (otherwise headers won't take)
        self.assertIn('x-object-meta-custom', stat_data['headers'])
        self.assertEqual(stat_data['headers']['x-object-meta-custom'], 'text')
        self.test_container.post({'header': ['X-Container-Meta-Exciting:value'],
                                  'read_acl': '.r:*'})
        stat_data = self.test_container.stat()
        self.assertEqual(stat_data['Read-ACL'], '.r:*')
        self.assertIn('x-container-meta-exciting', stat_data['headers'])
        self.assertEqual(stat_data['headers']['x-container-meta-exciting'], 'value')
        self.test_container.post({'read_acl': '.r:example.com'})
        self.assertEqual(self.test_container.stat()['Read-ACL'],
                         '.r:example.com')

    def test_copytree_to_from_dir_w_manifest(self):
        num_test_objs = 10
        test_obj_size = 100
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            self.create_dataset(tmp_d, num_test_objs, test_obj_size)
            # Make a nested file and an empty directory for testing purposes
            tmp_d = Path(tmp_d)
            os.mkdir(tmp_d / 'my_dir')
            open(tmp_d / 'my_dir' / 'empty_file', 'w').close()
            os.mkdir(tmp_d / 'my_dir' / 'empty_dir')

            stor.copytree(
                '.',
                self.test_dir,
                use_manifest=True)

            # Validate the contents of the manifest file
            manifest_contents = utils.get_data_manifest_contents(self.test_dir)
            expected_contents = self.get_dataset_obj_names(num_test_objs)
            expected_contents.extend(['my_dir/empty_file',
                                      'my_dir/empty_dir'])
            expected_contents = [Path('test') / c for c in expected_contents]
            self.assertEquals(set(manifest_contents), set(expected_contents))

        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            # Download the results successfully
            Path(self.test_dir).copytree(
                'test',
                use_manifest=True)

            # Now delete one of the objects from swift. A second download
            # will fail with a condition error
            Path(self.test_dir / 'my_dir' / 'empty_dir').remove()
            with self.assertRaises(exceptions.ConditionNotMetError):
                Path(self.test_dir).copytree(
                    'test',
                    use_manifest=True,
                    num_retries=0)

    def test_all_segment_container_types_are_deleted(self):
        segment_containers = [stor.join('swift://' + self.test_container.tenant,
                                        fmt % self.test_container.name)
                              for fmt in ('.segments_%s', '%s+segments', '%s_segments')]
        all_containers = segment_containers + [self.test_container]

        test_files = [stor.join(c, 'test_file_tbdeleted.txt') for c in all_containers]
        for t in test_files:
            with stor.open(t, 'w') as fp:
                fp.write('testtxt\n')
        assert all(t.exists() for t in test_files)
        stor.rmtree(self.test_container)
        for t in test_files:
            assert not t.exists(), 'Did not delete %s' % t

    def test_upload_multiple_dirs(self):
        with NamedTemporaryDirectory(change_dir=True) as tmp_d:
            num_test_objs = 10
            tmp_d = Path(tmp_d)

            # Create files filled with random data.
            path1 = tmp_d / 'dir1'
            os.mkdir(path1)
            self.create_dataset(path1, num_test_objs, 10)

            # Create empty dir and file.
            path2 = tmp_d / 'dir2'
            os.mkdir(path2)
            os.mkdir(path2 / 'my_dir')
            open(path2 / 'my_dir' / 'included_file', 'w').close()
            open(path2 / 'my_dir' / 'excluded_file', 'w').close()
            os.mkdir(path2 / 'my_dir' / 'included_dir')
            os.mkdir(path2 / 'my_dir' / 'excluded_dir')

            # Create file in the top level directory.
            open(tmp_d / 'top_level_file', 'w').close()

            to_upload = [
                'dir1',
                'dir2/my_dir/included_file',
                'dir2/my_dir/included_dir',
                'top_level_file',
            ]
            with tmp_d:
                swift_path = self.test_dir / 'subdir'
                swift_path.upload(to_upload, use_manifest=True)

            # Validate the contents of the manifest file
            manifest_contents = utils.get_data_manifest_contents(swift_path)
            expected_contents = [
                Path('dir1') / name
                for name in self.get_dataset_obj_names(num_test_objs)
            ]
            expected_contents.extend([
                'dir2/my_dir/included_file',
                'dir2/my_dir/included_dir',
                'top_level_file',
            ])

            expected_contents = [Path('test/subdir') / c for c in expected_contents]
            self.assertEquals(set(manifest_contents), set(expected_contents))