def fs(self): # Spark supports basically every filesystem there is if not self._fs: self._fs = CompositeFilesystem() if boto3_installed: self._fs.add_fs('s3', S3Filesystem( aws_access_key_id=self._opts['aws_access_key_id'], aws_secret_access_key=self._opts['aws_secret_access_key'], aws_session_token=self._opts['aws_session_token'], s3_endpoint=self._opts['s3_endpoint'], s3_region=self._opts['s3_region'], ), disable_if=_is_permanent_boto3_error) if google_libs_installed: self._fs.add_fs('gcs', GCSFilesystem( project_id=self._opts['project_id'], location=self._opts['gcs_region'], object_ttl_days=_DEFAULT_CLOUD_TMP_DIR_OBJECT_TTL_DAYS, ), disable_if=_is_permanent_google_error) # Hadoop FS is responsible for all URIs that fall through to it self._fs.add_fs('hadoop', HadoopFilesystem( self._opts['hadoop_bin'])) self._fs.add_fs('local', LocalFilesystem()) return self._fs
def _lock_contents(self, mock_cluster, steps_ahead=0): fs = S3Filesystem() contents = b''.join(fs.cat('s3://my_bucket/locks/%s/%d' % ( mock_cluster['Id'], len(mock_cluster['_Steps']) + steps_ahead))) return contents or None
def test_endpoint_for_bucket_in_us_west_2(self): self.add_mock_s3_data({'walrus': {}}, location='us-west-2') fs = S3Filesystem() bucket = fs.get_bucket('walrus') self.assertEqual(bucket.connection.host, 's3-us-west-2.amazonaws.com')
def test_get_location_is_forbidden(self): self.add_mock_s3_data({'walrus': {}}, location='us-west-2') fs = S3Filesystem() access_denied_error = ClientError( dict( Error=dict( Code='AccessDenied', Message='Access Denied', ), ResponseMetadata=dict( HTTPStatusCode=403 ), ), 'GetBucketLocation') with patch( 'tests.mock_boto3.s3.MockS3Client.get_bucket_location', side_effect=access_denied_error): bucket = fs.get_bucket('walrus') self.assertEqual(bucket.meta.client.meta.endpoint_url, 'https://s3.amazonaws.com') self.assertEqual(bucket.meta.client.meta.region_name, 'us-east-1')
def fs(self): # Spark supports basically every filesystem there is if not self._fs: self._fs = CompositeFilesystem() if boto3_installed: self._fs.add_fs('s3', S3Filesystem( aws_access_key_id=self._opts['aws_access_key_id'], aws_secret_access_key=self._opts['aws_secret_access_key'], aws_session_token=self._opts['aws_session_token'], s3_endpoint=self._opts['s3_endpoint'], s3_region=self._opts['s3_region'], ), disable_if=_is_permanent_boto3_error) if google_libs_installed: self._fs.add_fs('gcs', GCSFilesystem( project_id=self._opts['google_project_id'] ), disable_if=_is_permanent_google_error) self._fs.add_fs('hadoop', HadoopFilesystem( self._opts['hadoop_bin'])) self._fs.add_fs('local', LocalFilesystem()) return self._fs
def _lock_contents(self, mock_cluster): fs = S3Filesystem() contents = b''.join(fs.cat( 's3://my_bucket/locks/%s' % (mock_cluster['Id']))) return contents or None
def test_s3_ls(self): self.add_mock_s3_data( {'walrus': { 'one': b'', 'two': b'', 'three': b'' }}) fs = S3Filesystem() self.assertEqual( set(fs._s3_ls('s3://walrus/')), set([ 's3://walrus/one', 's3://walrus/two', 's3://walrus/three', ])) self.assertEqual(set(fs._s3_ls('s3://walrus/t')), set([ 's3://walrus/two', 's3://walrus/three', ])) self.assertEqual(set(fs._s3_ls('s3://walrus/t/')), set([])) # if we ask for a nonexistent bucket, we should get some sort # of exception (in practice, buckets with random names will # probably be owned by other people, and we'll get some sort # of permissions error) self.assertRaises(Exception, set, fs._s3_ls('s3://lolcat/'))
def test_endpoint_for_bucket_in_us_west_1(self): self.add_mock_s3_data({'walrus': {}}, location='us-west-1') fs = S3Filesystem() bucket = fs.get_bucket('walrus') self.assertEqual(bucket.meta.client.meta.region_name, 'us-west-1')
def test_endpoint_for_bucket_in_us_east_1(self): # location constraint for us-east-1 is '', not 'us-east-1' self.add_mock_s3_data({'walrus': {}}, location='') fs = S3Filesystem() bucket = fs.get_bucket('walrus') self.assertEqual(bucket.connection.host, 's3.amazonaws.com')
def test_force_s3_endpoint_url(self): fs = S3Filesystem(s3_endpoint='https://myproxy:8080') client = fs.make_s3_client() self.assertEqual(client.meta.endpoint_url, 'https://myproxy:8080') resource = fs.make_s3_resource() self.assertEqual(resource.meta.client.meta.endpoint_url, 'https://myproxy:8080')
def test_default_endpoint(self): fs = S3Filesystem() client = fs.make_s3_client() self.assertEqual(client.meta.endpoint_url, 'https://s3.amazonaws.com') resource = fs.make_s3_resource() self.assertEqual(resource.meta.client.meta.endpoint_url, 'https://s3.amazonaws.com')
def test_create_bucket_with_mkdir(self): # mkdir() doesn't have a way to specify bucket location, so we # do it at init time fs = S3Filesystem(s3_region='us-west-1') fs.mkdir('s3://walrus/data') bucket = fs.get_bucket('walrus') self.assertEqual(bucket.meta.client.meta.region_name, 'us-west-1')
def test_get_location_other_error(self): self.add_mock_s3_data({'walrus': {}}, location='us-west-2') fs = S3Filesystem() with patch('tests.mockboto.MockBucket.get_location', side_effect=boto.exception.S3ResponseError( 404, 'Not Found')): self.assertRaises(boto.exception.S3ResponseError, fs.get_bucket, 'walrus')
def test_get_location_is_forbidden(self): self.add_mock_s3_data({'walrus': {}}, location='us-west-2') fs = S3Filesystem() with patch('tests.mockboto.MockBucket.get_location', side_effect=boto.exception.S3ResponseError( 403, 'Forbidden')): bucket = fs.get_bucket('walrus') self.assertEqual(bucket.connection.host, 's3.amazonaws.com')
def test_force_s3_endpoint_region(self): # this is the actual mrjob default region fs = S3Filesystem(s3_region='us-west-2') client = fs.make_s3_client() self.assertEqual(client.meta.endpoint_url, 'https://s3-us-west-2.amazonaws.com') self.assertEqual(client.meta.region_name, 'us-west-2') resource = fs.make_s3_resource() self.assertEqual(resource.meta.client.meta.endpoint_url, 'https://s3-us-west-2.amazonaws.com') self.assertEqual(resource.meta.client.meta.region_name, 'us-west-2')
def test_buckets_from_forced_s3_endpoint(self): self.add_mock_s3_data({'walrus-east': {}}, location='us-east-2') self.add_mock_s3_data({'walrus-west': {}}, location='us-west-2') fs = S3Filesystem(s3_endpoint='s3-us-east-2.amazonaws.com') bucket_east = fs.get_bucket('walrus-east') self.assertEqual(bucket_east.connection.host, 's3-us-east-2.amazonaws.com') # can't access this bucket from wrong endpoint! self.assertRaises(boto.exception.S3ResponseError, fs.get_bucket, 'walrus-west')
def test_put_with_part_size(self): self.add_mock_s3_data({'bar-files': {}}) local_path = self.makefile('foo', contents=b'bar') dest = 's3://bar-files/foo' fs = S3Filesystem(part_size=12345) fs.put(local_path, dest) self.assertEqual(b''.join(self.fs.cat(dest)), b'bar') self.TransferConfig.assert_called_once_with( multipart_chunksize=12345, multipart_threshold=12345, )
def test_create_bucket_in_us_west_2(self): fs = S3Filesystem() fs.create_bucket('walrus', region='us-west-2') s3_client = fs.make_s3_client() self.assertEqual( s3_client.get_bucket_location('walrus')['LocationConstraint'], 'us-west-2') bucket = fs.get_bucket('walrus') self.assertEqual(bucket.meta.client.meta.endpoint_url, 'https://s3-us-west-2.amazonaws.com') self.assertEqual(bucket.meta.client.meta.region_name, 'us-west-2')
def test_create_bucket_with_no_region(self): fs = S3Filesystem() fs.create_bucket('walrus') s3_client = fs.make_s3_client() self.assertEqual( s3_client.get_bucket_location('walrus')['LocationConstraint'], None) bucket = fs.get_bucket('walrus') self.assertEqual(bucket.meta.client.meta.endpoint_url, 'https://s3.amazonaws.com') self.assertEqual(bucket.meta.client.meta.region_name, 'us-east-1')
def test_buckets_from_forced_s3_endpoint(self): self.add_mock_s3_data({'walrus-east': {}}, location='us-east-2') fs = S3Filesystem(s3_endpoint='s3-us-west-2.amazonaws.com') bucket_east = fs.get_bucket('walrus-east') with patch('tests.mock_boto3.s3.MockS3Client.get_bucket_location' ) as mock_gbl: # won't actually be able to access this bucket from this endpoint, # but boto3 doesn't check that on bucket creation self.assertEqual(bucket_east.meta.client.meta.endpoint_url, 'https://s3-us-west-2.amazonaws.com') # no reason to check bucket location if endpoint is forced self.assertFalse(mock_gbl.called)
def test_buckets_from_forced_s3_endpoint(self): self.add_mock_s3_data({'walrus-east': {}}, location='us-east-2') self.add_mock_s3_data({'walrus-west': {}}, location='us-west-2') fs = S3Filesystem(s3_endpoint='s3-us-east-2.amazonaws.com') bucket_east = fs.get_bucket('walrus-east') with patch('tests.mockboto.MockBucket.get_location') as mock_get_loc: self.assertEqual(bucket_east.connection.host, 's3-us-east-2.amazonaws.com') # no reason to check bucket location if endpoint is forced self.assertFalse(mock_get_loc.called) # can't access this bucket from wrong endpoint! self.assertRaises(boto.exception.S3ResponseError, fs.get_bucket, 'walrus-west')
def setUp(self): super(S3FSTestCase, self).setUp() self.fs = S3Filesystem()
def setUp(self): self.sandbox_boto() self.addCleanup(self.unsandbox_boto) self.fs = S3Filesystem('key_id', 'secret', 'nowhere')
def test_force_s3_endpoint(self): fs = S3Filesystem(s3_endpoint='s3-us-west-1.amazonaws.com') s3_conn = fs.make_s3_conn() self.assertEqual(s3_conn.host, 's3-us-west-1.amazonaws.com')
def test_default_endpoint(self): fs = S3Filesystem() s3_conn = fs.make_s3_conn() self.assertEqual(s3_conn.host, 's3.amazonaws.com')
def setUp(self): super(S3FSTestCase, self).setUp() self.fs = S3Filesystem() self.TransferConfig = self.start( patch('boto3.s3.transfer.TransferConfig'))
def test_bucket_does_not_exist(self): fs = S3Filesystem() self.assertRaises(ClientError, fs.get_bucket, 'walrus')