def test_error_dereferencing(api, users, location, es, httpserver: pytest_httpserver.HTTPServer): with api.test_request_context(): record = SWORDDeposit.create({}) object_version = ObjectVersion.create(bucket=record.bucket, key="some-file.txt") TagManager(object_version).update({ ObjectTagKey.ByReferenceURL: httpserver.url_for("some-file.txt"), # This one should get removed after dereferencing ObjectTagKey.ByReferenceNotDeleted: "true", ObjectTagKey.Packaging: PackagingFormat.SimpleZip, }) httpserver.expect_request("/some-file.txt").respond_with_data( b"", status=HTTPStatus.GONE) db.session.refresh(object_version) with pytest.raises(urllib.error.HTTPError): tasks.dereference_object(record.id, object_version.version_id) db.session.refresh(object_version) tags = TagManager(object_version) assert tags.get(ObjectTagKey.FileState) == FileState.Error
def test_put_metadata_document(api, users, location, es): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.commit() db.session.commit() response = client.put( "/sword/deposit/{}/metadata".format(record.pid.pid_value), headers={ "Metadata-Format": "http://purl.org/net/sword/3.0/types/Metadata", "Content-Type": "application/ld+json", }, data=json.dumps({}), ) assert response.status_code == HTTPStatus.NO_CONTENT record = SWORDDeposit.get_record(record.id) assert (record["swordMetadataSourceFormat"] == "http://purl.org/net/sword/3.0/types/Metadata") assert any("http://purl.org/net/sword/3.0/terms/formattedMetadata" in link["rel"] and link["metadataFormat"] == "http://purl.org/net/sword/3.0/types/Metadata" for link in record.get_status_as_jsonld()["links"])
def test_get_metadata_document(api, users, location, es): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.set_metadata({"dc:title": "Deposit title"}, SWORDMetadata) record.commit() db.session.commit() response = client.get("/sword/deposit/{}".format(record.pid.pid_value)) assert response.status_code == HTTPStatus.OK response = client.get("/sword/deposit/{}/metadata".format( record.pid.pid_value)) assert response.status_code == HTTPStatus.OK assert response.is_json assert response.json == { "@id": "http://localhost/sword/deposit/{}".format(record.pid.pid_value), "@context": "https://swordapp.github.io/swordv3/swordv3.jsonld", "dc:title": "Deposit title", }
def test_dereference_missing_upload(api, users, location, task_delay): with api.test_request_context(): # Assemble a segmented upload from parts, and complete it segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create( {} ) MultipartObject.create( bucket=segmented_upload_record.bucket, key="some-key", size=15, chunk_size=10, ) record: SWORDDeposit = SWORDDeposit.create({}) record.set_by_reference_files( [ ByReferenceFileDefinition( temporary_id=segmented_upload_record.id, content_disposition="attachment; filename=something.txt", content_type="text/plain", packaging=PackagingFormat.Binary, dereference=True, ), ], lambda *args: True, "http://localhost/", ) object_version = ObjectVersion.query.one() with pytest.raises(ValueError): tasks.dereference_object(record.id, object_version.version_id) assert TagManager(object_version)[ObjectTagKey.FileState] == FileState.Error
def test_dereference_without_url(api, location, es): with api.test_request_context(): record = SWORDDeposit.create({}) object_version = ObjectVersion.create(bucket=record.bucket, key="some-file.txt") with pytest.raises(ValueError): tasks.dereference_object(record.id, object_version.version_id)
def test_delete_old_files(api, location, es, task_delay): with api.test_request_context(): record: SWORDDeposit = SWORDDeposit.create({}) record.set_by_reference_files( [ ByReferenceFileDefinition( url="http://example.com/one", content_disposition="attachment; filename=br-yes.html", content_type="text/html", content_length=100, packaging=PackagingFormat.Binary, dereference=False, ), ByReferenceFileDefinition( url="http://example.com/two", content_disposition="attachment; filename=br-no.html", content_type="text/html", packaging=PackagingFormat.Binary, dereference=False, ), ], dereference_policy=lambda record, brf: brf.dereference, request_url="http://localhost/something", replace=False, ) record.ingest_file( io.BytesIO(b"data"), packaging_name=PackagingFormat.Binary, content_type="text/html", content_disposition="attachment; filename=direct-yes.html", replace=False, ) record.ingest_file( io.BytesIO(b"data"), packaging_name=PackagingFormat.Binary, content_type="text/html", content_disposition="attachment; filename=direct-no.html", replace=False, ) assert sorted(file.key for file in record.files) == [ "br-no.html", "br-yes.html", "direct-no.html", "direct-yes.html", ] tasks.delete_old_objects(["br-yes.html", "direct-yes.html"], bucket_id=record.bucket_id) assert sorted(file.key for file in record.files) == [ "br-yes.html", "direct-yes.html", ]
def test_non_binary_doesnt_shortcut_unpack( api, location, es, packaging_cls: Type[Packaging] ): with api.test_request_context(): record = SWORDDeposit.create({}) object_version = ObjectVersion.create( bucket=record.bucket, key="some-file.txt", stream=io.BytesIO(b"data") ) packaging = packaging_cls(record) assert packaging.shortcut_unpack(object_version) == NotImplemented
def test_put_fileset_url(api, users, location, es, task_delay): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.commit() object_version = ObjectVersion.create( record.bucket, key="old-file.txt", stream=io.BytesIO(b"hello"), mimetype="text/plain", ) ObjectVersionTag.create( object_version=object_version, key=ObjectTagKey.FileSetFile.value, value="true", ) db.session.commit() response = client.put( url_for("invenio_sword.depid_fileset", pid_value=record.pid.pid_value), data=b"hello again", headers={ "Content-Disposition": "attachment; filename=new-file.txt", "Content-Type": "text/plain", }, ) assert response.status_code == HTTPStatus.NO_CONTENT assert task_delay.call_count == 1 task_self = task_delay.call_args[0][0] task_self.apply() # Check original ObjectVersion is marked deleted original_object_versions = list( ObjectVersion.query.filter_by( bucket=record.bucket, key="old-file.txt").order_by("created")) assert len(original_object_versions) == 2 assert not original_object_versions[0].is_head assert original_object_versions[1].is_head assert original_object_versions[1].file is None # Check new object has been created new_object_version = ObjectVersion.query.filter_by( bucket=record.bucket, key="new-file.txt").one() assert new_object_version.is_head
def test_unpack_document_with_broken_bag( api, location, filename, error_class, fixtures_path ): with api.test_request_context(): record = SWORDDeposit.create({}) with open(os.path.join(fixtures_path, filename), "rb") as stream: object_version = ObjectVersion.create( bucket=record.bucket, key=filename, stream=stream ) packaging = Packaging.for_record_and_name(record, PackagingFormat.SwordBagIt) with pytest.raises(error_class): packaging.unpack(object_version)
def create_bagit_record(fixtures_path): with open(os.path.join(fixtures_path, "bagit.zip"), "rb") as f: record = SWORDDeposit.create({}) packaging = SWORDBagItPackaging(record) object_version = ObjectVersion.create( bucket=record.bucket, key=packaging.get_original_deposit_filename(), stream=f, ) packaging.unpack(object_version) record.commit() db.session.commit() return record
def test_post_metadata_document_to_append(api, users, location, es, view_name, status_code, additional_headers): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.set_metadata( { "@context": "https://swordapp.github.io/swordv3/swordv3.jsonld", "dc:title": "Some title", "dc:subject": "Some subject", }, SWORDMetadata, ) record.commit() db.session.commit() response = client.post( url_for(view_name, pid_value=record.pid.pid_value), headers={ "Metadata-Format": "http://purl.org/net/sword/3.0/types/Metadata", "Content-Type": "application/ld+json", **additional_headers, }, data=json.dumps({ "@context": "https://swordapp.github.io/swordv3/swordv3.jsonld", "dc:subject": "Another subject", "dc:creator": "A person", }), ) assert response.status_code == status_code record = SWORDDeposit.get_record(record.id) assert (record["swordMetadataSourceFormat"] == "http://purl.org/net/sword/3.0/types/Metadata") assert record["swordMetadata"] == { "@context": "https://swordapp.github.io/swordv3/swordv3.jsonld", "dc:title": "Some title", "dc:subject": "Another subject", "dc:creator": "A person", }
def test_post_service_document_with_incorrect_content_type( api, users, location, fixtures_path ): with api.test_request_context(): record = SWORDDeposit.create({}) with open(os.path.join(fixtures_path, "bagit.zip"), "rb") as stream: object_version = ObjectVersion.create( bucket=record.bucket, key="bagit.zip", stream=stream, mimetype="application/tar", ) packaging = Packaging.for_record_and_name(record, PackagingFormat.SwordBagIt) with pytest.raises(ContentTypeNotAcceptable): packaging.unpack(object_version)
def test_put_metadata_document_without_body(api, users, location, es): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.commit() db.session.commit() response = client.put("/sword/deposit/{}/metadata".format( record.pid.pid_value)) assert response.status_code == HTTPStatus.BAD_REQUEST
def test_get_original_deposit_filename(api, es, location): with api.test_request_context(): record: SWORDDeposit = SWORDDeposit.create({}) packaging = BinaryPackaging(record) filename = secrets.token_hex(16) assert ( packaging.get_original_deposit_filename(filename, media_type="text/plain") == filename ) assert packaging.get_original_deposit_filename( media_type="text/plain" ).endswith(".txt") assert packaging.get_original_deposit_filename().endswith(".bin")
def test_post_metadata_document_with_additional_metadata_format( api, users, location, es, test_metadata_format): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.set_metadata( { "@context": "https://swordapp.github.io/swordv3/swordv3.jsonld", "dc:title": "Some title", "dc:subject": "Some subject", }, SWORDMetadata, ) record.commit() db.session.commit() response = client.post( "/sword/deposit/{}/metadata".format(record.pid.pid_value), headers={ "Metadata-Format": test_metadata_format, "Content-Type": "text/plain", }, data=io.BytesIO(b"some metadata"), ) assert response.status_code == HTTPStatus.NO_CONTENT record = SWORDDeposit.get_record(record.id) # Check nothing changed assert (record["swordMetadataSourceFormat"] == "http://purl.org/net/sword/3.0/types/Metadata") assert record["swordMetadata"] == { "@context": "https://swordapp.github.io/swordv3/swordv3.jsonld", "dc:title": "Some title", "dc:subject": "Some subject", } assert len([ link for link in record.get_status_as_jsonld()["links"] if "http://purl.org/net/sword/3.0/terms/formattedMetadata" in link["rel"] ])
def test_by_reference_sets_tag(api, users, location, task_delay): with api.test_request_context(): # Assemble a segmented upload from parts, and complete it segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create( {} ) multipart_object = MultipartObject.create( bucket=segmented_upload_record.bucket, key="some-key", size=15, chunk_size=10, ) Part.create(multipart_object, 0, stream=io.BytesIO(b"abcdefghij")) Part.create(multipart_object, 1, stream=io.BytesIO(b"klmno")) multipart_object.complete() record: SWORDDeposit = SWORDDeposit.create({}) record.set_by_reference_files( [ ByReferenceFileDefinition( temporary_id=segmented_upload_record.id, content_disposition="attachment; filename=something.txt", content_type="text/plain", packaging=PackagingFormat.Binary, dereference=True, ), ], lambda *args: True, "http://localhost/", ) object_version = ObjectVersion.query.one() tags = TagManager(object_version) assert tags == { ObjectTagKey.OriginalDeposit: "true", ObjectTagKey.ByReferenceTemporaryID: str(segmented_upload_record.id), ObjectTagKey.Packaging: "http://purl.org/net/sword/3.0/package/Binary", ObjectTagKey.FileState: FileState.Pending, ObjectTagKey.ByReferenceDereference: "true", ObjectTagKey.ByReferenceNotDeleted: "true", } tasks.dereference_object(record.id, object_version.version_id) assert object_version.file.storage().open().read() == b"abcdefghijklmno"
def test_get_fileset_url(api, users, location, es): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.commit() db.session.commit() response = client.get( url_for("invenio_sword.depid_fileset", pid_value=record.pid.pid_value)) assert response.status_code == HTTPStatus.METHOD_NOT_ALLOWED
def test_delete_status_document(api, users, location, es): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.commit() db.session.commit() response = client.delete("/sword/deposit/{}".format( record.pid.pid_value)) assert response.status_code == HTTPStatus.NO_CONTENT response = client.get("/sword/deposit/{}".format(record.pid.pid_value)) assert response.status_code == HTTPStatus.GONE
def test_bad_files( api, location, filename, content_type, packaging_class, exception_class, fixtures_path, ): with api.app_context(): record = SWORDDeposit.create({}) packaging = packaging_class(record) with open(os.path.join(fixtures_path, filename), "rb") as stream: object_version = ObjectVersion.create(record.bucket, key=filename, stream=stream, mimetype=content_type) with pytest.raises(exception_class): packaging.unpack(object_version)
def test_simple_zip(api, users, location): with api.test_request_context(): record = SWORDDeposit.create({}) with open(os.path.join(fixtures_path, "simple.zip"), "rb") as stream: object_version = ObjectVersion.create( bucket=record.bucket, key="deposit.zip", stream=stream, mimetype="application/zip", ) SimpleZipPackaging(record).unpack(object_version) obj_1 = ObjectVersion.query.filter_by(bucket=record.bucket, key="example.svg").one() obj_2 = ObjectVersion.query.filter_by(bucket=record.bucket, key="hello.txt").one() assert obj_1.mimetype == "image/svg+xml" assert obj_2.mimetype == "text/plain"
def test_error_unpacking(api, users, location, es): with api.test_request_context(): record = SWORDDeposit.create({}) object_version = ObjectVersion.create(bucket=record.bucket, key="some-file.txt", mimetype="text/plain") TagManager(object_version).update({ ObjectTagKey.Packaging: PackagingFormat.SimpleZip, }) db.session.refresh(object_version) with pytest.raises(ContentTypeNotAcceptable): tasks.unpack_object(record.id, object_version.version_id) db.session.refresh(object_version) tags = TagManager(object_version) assert tags.get(ObjectTagKey.FileState) == FileState.Error
def test_post_fileset_url(api, users, location, es, task_delay): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.commit() ObjectVersion.create( record.bucket, key="old-file.txt", stream=io.BytesIO(b"hello"), mimetype="text/plain", ) db.session.commit() response = client.post( url_for("invenio_sword.depid_fileset", pid_value=record.pid.pid_value), data=b"hello again", headers={ "Content-Disposition": "attachment; filename=new-file.txt", "Content-Type": "text/plain", }, ) assert response.status_code == HTTPStatus.NO_CONTENT # Check original ObjectVersion is still there original_object_versions = list( ObjectVersion.query.filter_by( bucket=record.bucket, key="old-file.txt").order_by("created")) assert len(original_object_versions) == 1 assert original_object_versions[0].is_head # Check new object has been created new_object_version = ObjectVersion.query.filter_by( bucket=record.bucket, key="new-file.txt").one() assert new_object_version.is_head
def test_put_metadata_document_invalid_json(api, users, location, es): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.commit() db.session.commit() response = client.put( "/sword/deposit/{}/metadata".format(record.pid.pid_value), headers={ "Content-Type": "application/ld+json", "Metadata-Format": "http://purl.org/net/sword/3.0/types/Metadata", }, ) assert response.status_code == HTTPStatus.BAD_REQUEST
def test_dereference_task(api, users, location, es, httpserver: pytest_httpserver.HTTPServer): file_contents = "File contents.\n" with api.test_request_context(): record = SWORDDeposit.create({}) object_version = ObjectVersion.create(bucket=record.bucket, key="some-file.txt") TagManager(object_version).update({ ObjectTagKey.ByReferenceURL: httpserver.url_for("some-file.txt"), # This one should get removed after dereferencing ObjectTagKey.ByReferenceNotDeleted: "true", ObjectTagKey.Packaging: PackagingFormat.SimpleZip, }) httpserver.expect_request("/some-file.txt").respond_with_data( file_contents) db.session.refresh(object_version) tasks.dereference_object(record.id, object_version.version_id) # Check requests assert len(httpserver.log) == 1 assert httpserver.log[0][0].path == "/some-file.txt" db.session.refresh(object_version) assert object_version.file is not None assert object_version.file.storage().open().read( ) == file_contents.encode("utf-8") assert TagManager(object_version) == { ObjectTagKey.ByReferenceURL: httpserver.url_for("some-file.txt"), ObjectTagKey.Packaging: PackagingFormat.SimpleZip, ObjectTagKey.FileState: FileState.Pending, }
def test_dereference_already_dereferenced( api, location, es, httpserver: pytest_httpserver.HTTPServer): with api.test_request_context(): record = SWORDDeposit.create({}) object_version = ObjectVersion.create(bucket=record.bucket, key="some-file.txt", stream=io.BytesIO(b"data")) TagManager(object_version).update({ ObjectTagKey.ByReferenceURL: httpserver.url_for("some-file.txt"), ObjectTagKey.Packaging: PackagingFormat.SimpleZip, }) httpserver.expect_request("/some-file.txt").respond_with_data(b"data") db.session.refresh(object_version) result = tasks.dereference_object(record.id, object_version.version_id) assert result == ["some-file.txt"] assert httpserver.log == []
def test_put_status_document(api, users, location, es): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.commit() db.session.commit() object_version = ObjectVersion.create( record.bucket, "file.n3", mimetype="text/n3", stream=io.BytesIO(b"1 _:a 2 ."), ) ObjectVersionTag.create( object_version=object_version, key=ObjectTagKey.FileSetFile.value, value="true", ) response = client.put("/sword/deposit/{}".format(record.pid.pid_value), data=b"") assert response.status_code == HTTPStatus.OK # This should have removed the previous file, as the empty PUT is a reset. object_versions = list( ObjectVersion.query.filter_by( bucket=record.bucket).order_by("created")) assert len(object_versions) == 2 assert not object_versions[0].is_head assert object_versions[1].is_head assert object_versions[1].file is None
def test_put_metadata_document_with_unsupported_format(api, users, location, es): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.commit() db.session.commit() response = client.put( "/sword/deposit/{}/metadata".format(record.pid.pid_value), headers={ "Metadata-Format": "http://sword.invalid/Metadata", "Content-Type": "application/ld+json", }, data=json.dumps({}), ) assert response.status_code == HTTPStatus.UNSUPPORTED_MEDIA_TYPE assert response.json["@type"] == "MetadataFormatNotAcceptable"
def test_delete_metadata_document(api, users, location, es): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.set_metadata({"dc:title": "Deposit title"}, SWORDMetadata) record.commit() db.session.commit() assert record.get("swordMetadataSourceFormat") is not None assert record.get("swordMetadata") is not None response = client.delete("/sword/deposit/{}/metadata".format( record.pid.pid_value)) assert response.status_code == HTTPStatus.NO_CONTENT record = SWORDDeposit.get_record(record.id) assert record.get("swordMetadataSourceFormat") is None assert record.get("swordMetadata") is None
def test_get_status_document(api, users, location, es): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.commit() db.session.commit() ObjectVersion.create( record.bucket, "file.n3", mimetype="text/n3", stream=io.BytesIO(b"1 _:a 2 ."), ) response = client.get("/sword/deposit/{}".format(record.pid.pid_value)) assert response.status_code == HTTPStatus.OK assert len(response.json["links"]) == 1 assert response.json["links"][0]["contentType"] == "text/n3"