def test_convert_to_uploaded_file_happy_path(self): workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1", module_id_name="x") ipu = wf_module.in_progress_uploads.create() minio.put_bytes(ipu.Bucket, ipu.get_upload_key(), b"1234567") uploaded_file = ipu.convert_to_uploaded_file("test sheet.xlsx") self.assertEqual(uploaded_file.uuid, str(ipu.id)) final_key = wf_module.uploaded_file_prefix + str(ipu.id) + ".xlsx" # New file on S3 has the right bytes and metadata self.assertEqual( minio.get_object_with_data(minio.UserFilesBucket, final_key)["Body"], b"1234567", ) self.assertEqual( minio.client.head_object(Bucket=minio.UserFilesBucket, Key=final_key)["ContentDisposition"], "attachment; filename*=UTF-8''test%20sheet.xlsx", ) # InProgressUpload is completed self.assertEqual(ipu.is_completed, True) ipu.refresh_from_db() self.assertEqual(ipu.is_completed, True) # also on DB # Uploaded file is deleted self.assertFalse( minio.exists(minio.UserFilesBucket, ipu.get_upload_key()))
def test_upload_empty_file(self): md5sum = _base64_md5sum(b'') url, headers = minio.presign_upload(Bucket, 'key', 't.csv', 0, md5sum) http = urllib3.PoolManager() response = http.request('PUT', url, body=b'', headers=headers) self.assertEqual(response.status, 200) self.assertEqual( minio.get_object_with_data(Bucket, 'key')['Body'], b'')
def test_multipart_upload_by_presigned_requests(self, send_delta): """Test presign_upload_part _and_ complete_multipart_upload""" # Integration-test: use `urllib3` to run presigned responses. # See `test_minio` for canonical usage. user = User.objects.create(username='******', email='*****@*****.**') workflow = Workflow.create_and_init(owner=user) uuid = str(uuidgen.uuid4()) key = f'wf-123/wfm-234/{uuid}.csv' upload_id = minio.create_multipart_upload(minio.UserFilesBucket, key, 'file.csv') wf_module = workflow.tabs.first().wf_modules.create( order=0, module_id_name='x', inprogress_file_upload_id=upload_id, inprogress_file_upload_key=key, inprogress_file_upload_last_accessed_at=timezone.now() ) data = b'1234567' * 1024 * 1024 # 7MB => 5MB+2MB parts data1 = data[:5*1024*1024] data2 = data[5*1024*1024:] md5sum1 = _base64_md5sum(data1) md5sum2 = _base64_md5sum(data2) response1 = self.run_handler(presign_upload_part, user=user, workflow=workflow, wfModuleId=wf_module.id, uploadId=upload_id, partNumber=1, nBytes=len(data1), base64Md5sum=md5sum1) self.assertEqual(response1.error, '') response2 = self.run_handler(presign_upload_part, user=user, workflow=workflow, wfModuleId=wf_module.id, uploadId=upload_id, partNumber=2, nBytes=len(data2), base64Md5sum=md5sum2) self.assertEqual(response2.error, '') http = urllib3.PoolManager() s3response1 = http.request('PUT', response1.data['url'], body=data1, headers=response1.data['headers']) self.assertEqual(s3response1.status, 200) s3response2 = http.request('PUT', response2.data['url'], body=data2, headers=response2.data['headers']) self.assertEqual(s3response2.status, 200) etag1 = s3response1.headers['ETag'][1:-1] # un-wrap quotes etag2 = s3response2.headers['ETag'][1:-1] # un-wrap quotes send_delta.side_effect = async_noop response3 = self.run_handler(complete_multipart_upload, user=user, workflow=workflow, wfModuleId=wf_module.id, uploadId=upload_id, etags=[etag1, etag2]) self.assertResponse(response3, data={'uuid': uuid}) self.assertEqual( minio.get_object_with_data(minio.UserFilesBucket, key)['Body'], data )
def _external_module_get_html_bytes(id_name: str, version: str) -> Optional[bytes]: prefix = '%s/%s/' % (id_name, version) all_keys = minio.list_file_keys(minio.ExternalModulesBucket, prefix) try: html_key = next(k for k in all_keys if k.endswith('.html')) except StopIteration: return None # there is no HTML file return minio.get_object_with_data(minio.ExternalModulesBucket, html_key)['Body']
def test_assume_role_to_write_multipart(self): client = self._assume_role_session_client_with_write_access(Bucket, "key") from boto3.s3.transfer import TransferConfig data = b"1234567" * 1024 * 1024 # 7MB => 5MB+2MB parts client.upload_fileobj( io.BytesIO(data), Bucket, "key", Config=TransferConfig(multipart_threshold=5 * 1024 * 1024), ) self.assertEqual(minio.get_object_with_data(Bucket, "key")["Body"], data)
def test_upload_by_presigned_request(self): data = b'1234567' md5sum = _base64_md5sum(data) url, headers = minio.presign_upload(Bucket, 'key', 'file.csv', len(data), md5sum) http = urllib3.PoolManager() response = http.request('PUT', url, body=data, headers=headers) self.assertEqual(response.status, 200) self.assertEqual( minio.get_object_with_data(Bucket, 'key')['Body'], data) head = minio.client.head_object(Bucket=Bucket, Key='key') self.assertEqual(head['ContentDisposition'], "attachment; filename*=UTF-8''file.csv")
def test_finish_upload_happy_path(self, send_delta): user = User.objects.create(username="******", email="*****@*****.**") workflow = Workflow.create_and_init(owner=user) wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-1", module_id_name="x" ) in_progress_upload = wf_module.in_progress_uploads.create( id="147a9f5d-5b3e-41c3-a968-a84a5a9d587f" ) key = in_progress_upload.get_upload_key() minio.put_bytes(in_progress_upload.Bucket, key, b"1234567") send_delta.side_effect = async_noop response = self.run_handler( finish_upload, user=user, workflow=workflow, wfModuleId=wf_module.id, key=key, filename="test sheet.csv", ) self.assertResponse( response, data={"uuid": "147a9f5d-5b3e-41c3-a968-a84a5a9d587f"} ) # The uploaded file is deleted self.assertFalse(minio.exists(in_progress_upload.Bucket, key)) # A new upload is created uploaded_file = wf_module.uploaded_files.first() self.assertEqual(uploaded_file.name, "test sheet.csv") self.assertEqual(uploaded_file.size, 7) self.assertEqual(uploaded_file.uuid, "147a9f5d-5b3e-41c3-a968-a84a5a9d587f") self.assertEqual(uploaded_file.bucket, in_progress_upload.Bucket) final_key = f"wf-{workflow.id}/wfm-{wf_module.id}/147a9f5d-5b3e-41c3-a968-a84a5a9d587f.csv" self.assertEqual(uploaded_file.key, final_key) # The file has the right bytes and metadata self.assertEqual( minio.get_object_with_data(minio.UserFilesBucket, final_key)["Body"], b"1234567", ) self.assertEqual( minio.client.head_object(Bucket=minio.UserFilesBucket, Key=final_key)[ "ContentDisposition" ], "attachment; filename*=UTF-8''test%20sheet.csv", ) # wf_module is updated send_delta.assert_called()
def test_wf_module_duplicate_copy_uploaded_file(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, module_id_name='upload', ) uuid = str(uuidgen.uuid4()) key = f'{wf_module.uploaded_file_prefix}{uuid}.csv' minio.put_bytes(minio.UserFilesBucket, key, b'1234567') # Write the uuid to the old module -- we'll check the new module points # to a valid file wf_module.params = {'file': uuid, 'has_header': True} wf_module.save(update_fields=['params']) uploaded_file = wf_module.uploaded_files.create( name='t.csv', uuid=uuid, bucket=minio.UserFilesBucket, key=key, size=7, ) workflow2 = Workflow.create_and_init() tab2 = workflow2.tabs.first() wf_module2 = wf_module.duplicate(tab2) uploaded_file2 = wf_module2.uploaded_files.first() self.assertIsNotNone(uploaded_file2) # New file gets same uuid -- because it's the same file and we don't # want to edit params during copy self.assertEqual(uploaded_file2.uuid, uuid) self.assertEqual(wf_module2.params['file'], uuid) self.assertTrue( # The new file should be in a different path uploaded_file2.key.startswith(wf_module2.uploaded_file_prefix) ) self.assertEqual(uploaded_file2.name, 't.csv') self.assertEqual(uploaded_file2.size, 7) self.assertEqual(uploaded_file2.created_at, uploaded_file.created_at) self.assertEqual( minio.get_object_with_data(uploaded_file2.bucket, uploaded_file2.key)['Body'], b'1234567' )
def test_complete_happy_path(self, queue_render, send_delta): send_delta.return_value = async_noop() queue_render.return_value = async_noop() _init_module("x") workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-123", module_id_name="x", file_upload_api_token="abc123") upload = wf_module.in_progress_uploads.create() uuid = str(upload.id) key = upload.get_upload_key() minio.put_bytes(upload.Bucket, key, b"1234567") response = self.client.post( f"/api/v1/workflows/{workflow.id}/steps/step-123/uploads/{upload.id}", {"filename": "test.csv"}, content_type="application/json", HTTP_AUTHORIZATION="Bearer abc123", ) self.assertEqual(response.status_code, 200) # Upload and its S3 data were deleted self.assertFalse(minio.exists(upload.Bucket, key)) upload.refresh_from_db() self.assertTrue(upload.is_completed) # Final upload was created uploaded_file = wf_module.uploaded_files.first() self.assertEqual(uploaded_file.key, f"wf-{workflow.id}/wfm-{wf_module.id}/{uuid}.csv") self.assertEqual( minio.get_object_with_data(minio.UserFilesBucket, uploaded_file.key)["Body"], b"1234567", ) self.assertEqual(uploaded_file.name, "test.csv") # Return value includes uuid data = json.loads(response.content) self.assertEqual(data["uuid"], uuid) self.assertEqual(data["name"], "test.csv") self.assertEqual(data["size"], 7) # Send deltas send_delta.assert_called() queue_render.assert_called()
def test_wf_module_duplicate_copy_uploaded_file(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="upload" ) uuid = str(uuidgen.uuid4()) key = f"{wf_module.uploaded_file_prefix}{uuid}.csv" minio.put_bytes(minio.UserFilesBucket, key, b"1234567") # Write the uuid to the old module -- we'll check the new module points # to a valid file wf_module.params = {"file": uuid, "has_header": True} wf_module.save(update_fields=["params"]) uploaded_file = wf_module.uploaded_files.create( name="t.csv", uuid=uuid, bucket=minio.UserFilesBucket, key=key, size=7 ) workflow2 = Workflow.create_and_init() tab2 = workflow2.tabs.first() wf_module2 = wf_module.duplicate_into_new_workflow(tab2) uploaded_file2 = wf_module2.uploaded_files.first() self.assertIsNotNone(uploaded_file2) # New file gets same uuid -- because it's the same file and we don't # want to edit params during copy self.assertEqual(uploaded_file2.uuid, uuid) self.assertEqual(wf_module2.params["file"], uuid) self.assertTrue( # The new file should be in a different path uploaded_file2.key.startswith(wf_module2.uploaded_file_prefix) ) self.assertEqual(uploaded_file2.name, "t.csv") self.assertEqual(uploaded_file2.size, 7) self.assertEqual(uploaded_file2.created_at, uploaded_file.created_at) self.assertEqual( minio.get_object_with_data(uploaded_file2.bucket, uploaded_file2.key)[ "Body" ], b"1234567", )
def test_multipart_upload_by_presigned_requests(self): upload_id = minio.create_multipart_upload(Bucket, 'key', 'file.csv') data = b'1234567' * 1024 * 1024 # 7MB => 5MB+2MB parts data1 = data[:5 * 1024 * 1024] data2 = data[5 * 1024 * 1024:] md5sum1 = _base64_md5sum(data1) md5sum2 = _base64_md5sum(data2) url1, headers1 = minio.presign_upload_part(Bucket, 'key', upload_id, 1, len(data1), md5sum1) url2, headers2 = minio.presign_upload_part(Bucket, 'key', upload_id, 2, len(data2), md5sum2) http = urllib3.PoolManager() response1 = http.request('PUT', url1, body=data1, headers=headers1) self.assertEqual(response1.status, 200) etag1 = response1.headers['ETag'][1:-1] # un-wrap quotes response2 = http.request('PUT', url2, body=data2, headers=headers2) self.assertEqual(response2.status, 200) etag2 = response2.headers['ETag'][1:-1] # un-wrap quotes minio.complete_multipart_upload(Bucket, 'key', upload_id, [etag1, etag2]) self.assertEqual( minio.get_object_with_data(Bucket, 'key')['Body'], data)
def test_integration_happy_path(self): workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1", module_id_name="x") ipu = wf_module.in_progress_uploads.create() updated_at1 = ipu.updated_at time.sleep(0.000001) # so updated_at changes params = ipu.generate_upload_parameters() ipu.refresh_from_db() # ensure we wrote updated_at updated_at2 = ipu.updated_at self.assertGreater(updated_at2, updated_at1) # Upload using a separate S3 client # Import _after_ we've imported minio -- so server.minio's monkey-patch # takes effect. import boto3 credentials = params["credentials"] session = boto3.session.Session( aws_access_key_id=credentials["accessKeyId"], aws_secret_access_key=credentials["secretAccessKey"], aws_session_token=credentials["sessionToken"], region_name=params["region"], ) client = session.client("s3", endpoint_url=params["endpoint"]) client.put_object(Bucket=ipu.Bucket, Key=ipu.get_upload_key(), Body=b"1234567") # Complete the upload uploaded_file = ipu.convert_to_uploaded_file("test.csv") self.assertEqual( minio.get_object_with_data(uploaded_file.bucket, uploaded_file.key)["Body"], b"1234567", )
def test_assume_role_to_write(self): client = self._assume_role_session_client_with_write_access(Bucket, "key") data = b"1234567" client.upload_fileobj(io.BytesIO(data), Bucket, "key") self.assertEqual(minio.get_object_with_data(Bucket, "key")["Body"], data)