def test_put(client, db, bucket, permissions, multipart, multipart_url, get_md5, get_json): """Test part upload.""" cases = [ (None, 404), ('auth', 404), ('objects', 404), # TODO - use 403 instead ('bucket', 200), ('location', 200), ] for user, expected in cases: login_user(client, permissions[user]) data = b'a' * multipart.chunk_size res = client.put( multipart_url + '&partNumber={0}'.format(1), input_stream=BytesIO(data), ) assert res.status_code == expected if res.status_code == 200: assert res.get_etag()[0] == get_md5(data) # Assert content with open(multipart.file.uri, 'rb') as fp: fp.seek(multipart.chunk_size) content = fp.read(multipart.chunk_size) assert content == data assert Part.count(multipart) == 1 assert Part.get_or_none(multipart, 1).checksum == get_md5(data)
def test_post_by_reference_segmented(api, users, location, task_delay): with api.test_request_context(), api.test_client() as client: # Assemble a segmented upload from parts, and complete it segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create( {} ) multipart_object = MultipartObject.create( bucket=segmented_upload_record.bucket, key="some-key", size=15, chunk_size=10, ) Part.create(multipart_object, 0, stream=io.BytesIO(b"abcdefghij")) Part.create(multipart_object, 1, stream=io.BytesIO(b"klmno")) multipart_object.complete() login(client) ttl = ( datetime.datetime.now(tz=datetime.timezone.utc) + datetime.timedelta(0, 3600) ).isoformat() response = client.post( "/sword/service-document", data=json.dumps( { "@context": JSON_LD_CONTEXT, "@type": "ByReference", "byReferenceFiles": [ { "@id": f"http://localhost/sword/staging/{segmented_upload_record.id}", "contentDisposition": "attachment; filename=some-resource.json", "contentType": "application/json", "dereference": True, "ttl": ttl, } ], } ), headers={ "Content-Disposition": "attachment; by-reference=true", "Content-Type": "application/ld+json", }, ) assert response.status_code == HTTPStatus.CREATED object_version = ObjectVersion.query.one() tags = TagManager(object_version) assert tags == { ObjectTagKey.Packaging: "http://purl.org/net/sword/3.0/package/Binary", ObjectTagKey.ByReferenceTemporaryID: str(segmented_upload_record.id), ObjectTagKey.FileState: FileState.Pending, ObjectTagKey.ByReferenceDereference: "true", ObjectTagKey.ByReferenceNotDeleted: "true", ObjectTagKey.OriginalDeposit: "true", ObjectTagKey.ByReferenceTTL: ttl, }
def test_by_reference_sets_tag(api, users, location, task_delay): with api.test_request_context(): # Assemble a segmented upload from parts, and complete it segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create( {} ) multipart_object = MultipartObject.create( bucket=segmented_upload_record.bucket, key="some-key", size=15, chunk_size=10, ) Part.create(multipart_object, 0, stream=io.BytesIO(b"abcdefghij")) Part.create(multipart_object, 1, stream=io.BytesIO(b"klmno")) multipart_object.complete() record: SWORDDeposit = SWORDDeposit.create({}) record.set_by_reference_files( [ ByReferenceFileDefinition( temporary_id=segmented_upload_record.id, content_disposition="attachment; filename=something.txt", content_type="text/plain", packaging=PackagingFormat.Binary, dereference=True, ), ], lambda *args: True, "http://localhost/", ) object_version = ObjectVersion.query.one() tags = TagManager(object_version) assert tags == { ObjectTagKey.OriginalDeposit: "true", ObjectTagKey.ByReferenceTemporaryID: str(segmented_upload_record.id), ObjectTagKey.Packaging: "http://purl.org/net/sword/3.0/package/Binary", ObjectTagKey.FileState: FileState.Pending, ObjectTagKey.ByReferenceDereference: "true", ObjectTagKey.ByReferenceNotDeleted: "true", } tasks.dereference_object(record.id, object_version.version_id) assert object_version.file.storage().open().read() == b"abcdefghijklmno"
def test_multipart_full(app, db, bucket): """Test full multipart object.""" app.config.update( dict( FILES_REST_MULTIPART_CHUNKSIZE_MIN=5 * 1024 * 1024, FILES_REST_MULTIPART_CHUNKSIZE_MAX=5 * 1024 * 1024 * 1024, )) # Initial parameters chunks = 20 chunk_size = 5 * 1024 * 1024 # 5 MiB last_chunk = 1024 * 1024 # 1 MiB size = (chunks - 1) * chunk_size + last_chunk # Initiate mp = MultipartObject.create(bucket, 'testfile', size=size, chunk_size=chunk_size) db.session.commit() # Create parts for i in range(chunks): part_size = chunk_size if i < chunks - 1 else last_chunk Part.create(mp, i, stream=make_stream(part_size)) db.session.commit() # Complete mp.complete() db.session.commit() # Merge parts. pre_size = mp.bucket.size mp.merge_parts() db.session.commit() # Test size update bucket = Bucket.get(bucket.id) assert bucket.size == pre_size app.config.update( dict( FILES_REST_MULTIPART_CHUNKSIZE_MIN=2, FILES_REST_MULTIPART_CHUNKSIZE_MAX=20, ))
def test_multipart_full(app, db, bucket): """Test full multipart object.""" app.config.update(dict( FILES_REST_MULTIPART_CHUNKSIZE_MIN=5 * 1024 * 1024, FILES_REST_MULTIPART_CHUNKSIZE_MAX=5 * 1024 * 1024 * 1024, )) # Initial parameters chunks = 20 chunk_size = 5 * 1024 * 1024 # 5 MiB last_chunk = 1024 * 1024 # 1 MiB size = (chunks - 1) * chunk_size + last_chunk # Initiate mp = MultipartObject.create( bucket, 'testfile', size=size, chunk_size=chunk_size) db.session.commit() # Create parts for i in range(chunks): part_size = chunk_size if i < chunks - 1 else last_chunk Part.create(mp, i, stream=make_stream(part_size)) db.session.commit() # Complete mp.complete() db.session.commit() # Merge parts. pre_size = mp.bucket.size mp.merge_parts() db.session.commit() # Test size update bucket = Bucket.get(bucket.id) assert bucket.size == pre_size app.config.update(dict( FILES_REST_MULTIPART_CHUNKSIZE_MIN=2, FILES_REST_MULTIPART_CHUNKSIZE_MAX=20, ))
def test_get(client, db, bucket, permissions, multipart, multipart_url, get_json): """Test get parts.""" Part.create(multipart, 0) Part.create(multipart, 1) Part.create(multipart, 3) db.session.commit() cases = [ (None, 404), ('auth', 404), ('objects', 404), ('bucket', 200), ('location', 200), ] for user, expected in cases: login_user(client, permissions[user]) res = client.get(multipart_url) assert res.status_code == expected if res.status_code == 200: data = get_json(res) assert len(data['parts']) == 3
def parts(db, multipart): """All parts for a multipart object.""" items = [] for i in range(multipart.last_part_number + 1): chunk_size = multipart.chunk_size \ if not i == multipart.last_part_number \ else multipart.last_part_size p = Part.create(multipart, i, stream=BytesIO(u'{0}'.format(i).encode('ascii') * chunk_size)) items.append(p) db.session.commit() return items
def parts(db, multipart): """All parts for a multipart object.""" items = [] for i in range(multipart.last_part_number + 1): chunk_size = multipart.chunk_size \ if not i == multipart.last_part_number \ else multipart.last_part_size p = Part.create( multipart, i, stream=BytesIO(u'{0}'.format(i).encode('ascii') * chunk_size) ) items.append(p) db.session.commit() return items
def test_part_creation(app, db, bucket, get_sha256): """Test part creation.""" assert bucket.size == 0 mp = MultipartObject.create(bucket, 'test.txt', 5, 2) db.session.commit() assert bucket.size == 5 Part.create(mp, 2, stream=BytesIO(b'p')) Part.create(mp, 0, stream=BytesIO(b'p1')) Part.create(mp, 1, stream=BytesIO(b'p2')) db.session.commit() assert bucket.size == 5 mp.complete() db.session.commit() assert bucket.size == 5 # Assert checksum of part. m = hashlib.sha256() m.update(b'p2') assert "sha256:{0}".format(m.hexdigest()) == \ Part.get_or_none(mp, 1).checksum obj = mp.merge_parts() db.session.commit() assert bucket.size == 5 assert MultipartObject.query.count() == 0 assert Part.query.count() == 0 assert obj.file.size == 5 assert obj.file.checksum == get_sha256(b'p1p2p') assert obj.file.storage().open().read() == b'p1p2p' assert obj.file.writable is False assert obj.file.readable is True assert obj.version_id == ObjectVersion.get(bucket, 'test.txt').version_id
def test_part_creation(app, db, bucket, get_md5): """Test part creation.""" assert bucket.size == 0 mp = MultipartObject.create(bucket, 'test.txt', 5, 2) db.session.commit() assert bucket.size == 5 Part.create(mp, 2, stream=BytesIO(b'p')) Part.create(mp, 0, stream=BytesIO(b'p1')) Part.create(mp, 1, stream=BytesIO(b'p2')) db.session.commit() assert bucket.size == 5 mp.complete() db.session.commit() assert bucket.size == 5 # Assert checksum of part. m = hashlib.md5() m.update(b'p2') assert "md5:{0}".format(m.hexdigest()) == Part.get_or_none(mp, 1).checksum obj = mp.merge_parts() db.session.commit() assert bucket.size == 5 assert MultipartObject.query.count() == 0 assert Part.query.count() == 0 assert obj.file.size == 5 assert obj.file.checksum == get_md5(b'p1p2p') assert obj.file.storage().open().read() == b'p1p2p' assert obj.file.writable is False assert obj.file.readable is True assert obj.version_id == ObjectVersion.get(bucket, 'test.txt').version_id