def test_s3_file_manager_read(): state = {"called": 0} bar_bytes = "bar".encode() class S3Mock(mock.MagicMock): def download_file(self, *_args, **kwargs): state["called"] += 1 assert state["called"] == 1 state["bucket"] = kwargs.get("Bucket") state["key"] = kwargs.get("Key") file_name = kwargs.get("Filename") state["file_name"] = file_name with open(file_name, "wb") as ff: ff.write(bar_bytes) s3_mock = S3Mock() file_manager = S3FileManager(s3_mock, "some-bucket", "some-key") file_handle = S3FileHandle("some-bucket", "some-key/kdjfkjdkfjkd") with file_manager.read(file_handle) as file_obj: assert file_obj.read() == bar_bytes assert state["bucket"] == file_handle.s3_bucket assert state["key"] == file_handle.s3_key # read again. cached with file_manager.read(file_handle) as file_obj: assert file_obj.read() == bar_bytes assert os.path.exists(state["file_name"]) file_manager.delete_local_temp() assert not os.path.exists(state["file_name"])
def test_s3_file_manager_read(): state = {'called': 0} bar_bytes = 'bar'.encode() class S3Mock(mock.MagicMock): def download_file(self, *_args, **kwargs): state['called'] += 1 assert state['called'] == 1 state['bucket'] = kwargs.get('Bucket') state['key'] = kwargs.get('Key') file_name = kwargs.get('Filename') state['file_name'] = file_name with open(file_name, 'wb') as ff: ff.write(bar_bytes) s3_mock = S3Mock() file_manager = S3FileManager(s3_mock, 'some-bucket', 'some-key') file_handle = S3FileHandle('some-bucket', 'some-key/kdjfkjdkfjkd') with file_manager.read(file_handle) as file_obj: assert file_obj.read() == bar_bytes assert state['bucket'] == file_handle.s3_bucket assert state['key'] == file_handle.s3_key # read again. cached with file_manager.read(file_handle) as file_obj: assert file_obj.read() == bar_bytes assert os.path.exists(state['file_name']) file_manager.delete_local_temp() assert not os.path.exists(state['file_name'])
def test_s3_file_manager_write(mock_s3_resource, mock_s3_bucket): file_manager = S3FileManager(mock_s3_resource.meta.client, mock_s3_bucket.name, "some-key") body = "foo".encode() file_handle = file_manager.write_data(body) assert mock_s3_bucket.Object(file_handle.s3_key).get()["Body"].read() == body file_handle = file_manager.write_data(body, ext="foo") assert file_handle.s3_key.endswith(".foo") assert mock_s3_bucket.Object(file_handle.s3_key).get()["Body"].read() == body
def test_s3_file_manager_write(): s3_mock = mock.MagicMock() file_manager = S3FileManager(s3_mock, 'some-bucket', 'some-key') foo_bytes = 'foo'.encode() file_handle = file_manager.write_data(foo_bytes) assert isinstance(file_handle, S3FileHandle) assert file_handle.s3_bucket == 'some-bucket' assert file_handle.s3_key.startswith('some-key/') assert s3_mock.put_object.call_count == 1
def test_unzip_file_handle_on_fake_s3(): foo_bytes = b"foo" @solid(required_resource_keys={"file_manager"}, output_defs=[OutputDefinition(S3FileHandle)]) def write_zipped_file_to_s3_store(context): with get_temp_file_name() as zip_file_name: write_zip_file_to_disk(zip_file_name, "an_archive_member", foo_bytes) with open(zip_file_name, "rb") as ff: s3_file_handle = context.resources.file_manager.write_data(ff.read()) return s3_file_handle # Uses mock S3 # https://github.com/spulec/moto/issues/3292 s3 = boto3.client("s3", region_name="us-east-1") s3.create_bucket(Bucket="some-bucket") file_manager = S3FileManager(s3_session=s3, s3_bucket="some-bucket", s3_base_key="dagster") @pipeline( mode_defs=[ ModeDefinition( resource_defs={ "s3": ResourceDefinition.hardcoded_resource(s3), "file_manager": ResourceDefinition.hardcoded_resource(file_manager), "io_manager": s3_pickle_io_manager, }, ) ] ) def do_test_unzip_file_handle_s3(): return unzip_file_handle(write_zipped_file_to_s3_store()) result = execute_pipeline( do_test_unzip_file_handle_s3, run_config={ "resources": {"io_manager": {"config": {"s3_bucket": "some-bucket"}}}, "solids": { "unzip_file_handle": {"inputs": {"archive_member": {"value": "an_archive_member"}}} }, }, ) assert result.success zipped_s3_file = result.result_for_solid("write_zipped_file_to_s3_store").output_value() unzipped_s3_file = result.result_for_solid("unzip_file_handle").output_value() bucket_keys = [obj["Key"] for obj in s3.list_objects(Bucket="some-bucket")["Contents"]] assert zipped_s3_file.s3_key in bucket_keys assert unzipped_s3_file.s3_key in bucket_keys
def test_s3_file_manager_read(mock_s3_resource, mock_s3_bucket): body = "bar".encode() remote_s3_object = mock_s3_bucket.Object("some-key/foo") remote_s3_object.put(Body=body) file_manager = S3FileManager(mock_s3_resource.meta.client, mock_s3_bucket.name, "some-key") file_handle = S3FileHandle(mock_s3_bucket.name, "some-key/foo") with file_manager.read(file_handle) as file_obj: assert file_obj.read() == body # read again. cached remote_s3_object.delete() with file_manager.read(file_handle) as file_obj: assert file_obj.read() == body
def test_s3_file_manager_write(): s3_mock = mock.MagicMock() file_manager = S3FileManager(s3_mock, "some-bucket", "some-key") foo_bytes = "foo".encode() file_handle = file_manager.write_data(foo_bytes) assert isinstance(file_handle, S3FileHandle) assert file_handle.s3_bucket == "some-bucket" assert file_handle.s3_key.startswith("some-key/") assert s3_mock.put_object.call_count == 1 file_handle = file_manager.write_data(foo_bytes, ext="foo") assert isinstance(file_handle, S3FileHandle) assert file_handle.s3_bucket == "some-bucket" assert file_handle.s3_key.startswith("some-key/") assert file_handle.s3_key[-4:] == ".foo" assert s3_mock.put_object.call_count == 2
def test_unzip_file_handle_on_fake_s3(): foo_bytes = 'foo'.encode() @solid(required_resource_keys={'file_manager'}, output_defs=[OutputDefinition(S3FileHandle)]) def write_zipped_file_to_s3_store(context): with get_temp_file_name() as zip_file_name: write_zip_file_to_disk(zip_file_name, 'an_archive_member', foo_bytes) with open(zip_file_name, 'rb') as ff: s3_file_handle = context.resources.file_manager.write_data( ff.read()) return s3_file_handle # Uses mock S3 s3 = boto3.client('s3') s3.create_bucket(Bucket='some-bucket') file_manager = S3FileManager(s3_session=s3, s3_bucket='some-bucket', s3_base_key='dagster') @pipeline(mode_defs=[ ModeDefinition( resource_defs={ 's3': ResourceDefinition.hardcoded_resource(s3), 'file_manager': ResourceDefinition.hardcoded_resource(file_manager), }, system_storage_defs=[s3_system_storage], ) ]) def do_test_unzip_file_handle_s3(): return unzip_file_handle(write_zipped_file_to_s3_store()) result = execute_pipeline( do_test_unzip_file_handle_s3, run_config={ 'storage': { 's3': { 'config': { 's3_bucket': 'some-bucket' } } }, 'solids': { 'unzip_file_handle': { 'inputs': { 'archive_member': { 'value': 'an_archive_member' } } } }, }, ) assert result.success zipped_s3_file = result.result_for_solid( 'write_zipped_file_to_s3_store').output_value() unzipped_s3_file = result.result_for_solid( 'unzip_file_handle').output_value() bucket_keys = [ obj['Key'] for obj in s3.list_objects(Bucket='some-bucket')['Contents'] ] assert zipped_s3_file.s3_key in bucket_keys assert unzipped_s3_file.s3_key in bucket_keys