def test_deposit_metadata_extended_swhid( swhid, authenticated_client, deposit_collection, atom_dataset, swh_storage, ): """Posting a swhid reference is rejected if the referenced SWHID is for an extended object type """ xml_data = atom_dataset["entry-data-with-swhid-no-prov"].format( swhid=swhid) response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=xml_data, ) assert (response.status_code == status.HTTP_400_BAD_REQUEST ), response.content.decode() response_content = ElementTree.fromstring(response.content) assert "Invalid SWHID reference" in response_content.findtext( "atom:summary", namespaces=NAMESPACES)
def test_post_deposit_atom_with_slug_and_external_identifier( authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker): """Even though <external_identifier> is deprecated, it should still be allowed when it matches the slug, so that we don't break existing clients """ url = reverse(COL_IRI, args=[deposit_collection.name]) slug = str(uuid.uuid4()) # when response = post_atom( authenticated_client, url, data=atom_dataset["error-with-external-identifier"] % slug, HTTP_IN_PROGRESS="false", HTTP_SLUG=slug, ) assert response.status_code == status.HTTP_201_CREATED response_content = ElementTree.fromstring(response.content) deposit_id = int( response_content.findtext("swh:deposit_id", namespaces=NAMESPACES)) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.collection == deposit_collection assert deposit.origin_url == deposit_user.provider_url + slug assert deposit.status == DEPOSIT_STATUS_DEPOSITED
def test_post_metadata_empty_post_finalize_deposit_ok( authenticated_client, deposit_collection, partial_deposit_with_metadata, atom_dataset, ): """Empty atom post entry with header in-progress to false transitions deposit to 'deposited' status Response: 200 """ deposit = partial_deposit_with_metadata assert deposit.status == DEPOSIT_STATUS_PARTIAL update_uri = reverse(SE_IRI, args=[deposit_collection.name, deposit.id]) response = post_atom( authenticated_client, update_uri, data="", size=0, HTTP_IN_PROGRESS=False, ) assert response.status_code == status.HTTP_200_OK deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED
def test_post_deposit_atom_201_even_with_decimal(authenticated_client, deposit_collection, atom_dataset): """Posting an initial atom entry should return 201 with deposit receipt""" atom_error_with_decimal = atom_dataset["error-with-decimal"] response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=atom_error_with_decimal, HTTP_SLUG="external-id", HTTP_IN_PROGRESS="false", ) # then assert response.status_code == status.HTTP_201_CREATED, response.content.decode( ) response_content = ElementTree.fromstring(response.content) deposit_id = int( response_content.findtext("swh:deposit_id", namespaces=NAMESPACES)) deposit = Deposit.objects.get(pk=deposit_id) dr = DepositRequest.objects.get(deposit=deposit) assert dr.raw_metadata is not None sw_version = ElementTree.fromstring(dr.raw_metadata).findtext( "codemeta:softwareVersion", namespaces=NAMESPACES) assert sw_version == "10.4"
def test_post_deposit_atom_with_create_origin_and_external_identifier( authenticated_client, deposit_collection, atom_dataset, deposit_user): """<atom:external_identifier> was deprecated before <swh:create_origin> was introduced, clients should get an error when trying to use both """ external_id = "foobar" origin_url = deposit_user.provider_url + external_id url = reverse(COL_IRI, args=[deposit_collection.name]) document = atom_dataset[ "error-with-external-identifier-and-create-origin"].format( external_id=external_id, url=origin_url, ) # when response = post_atom( authenticated_client, url, data=document, HTTP_IN_PROGRESS="false", ) assert b"<external_identifier> is deprecated" in response.content assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_add_deposit_add_to_origin_conflict( authenticated_client, deposit_collection, deposit_another_collection, atom_dataset, sample_archive, deposit_user, deposit_another_user, ): """Posting a deposit with an <swh:add_to_origin> referencing an origin owned by a different client raises an error """ external_id = "foobar" origin_url = deposit_another_user.provider_url + external_id # create a deposit for that other user, with the same slug internal_create_deposit( deposit_another_user, deposit_another_collection, external_id, DEPOSIT_STATUS_LOAD_SUCCESS, ) # adding a new deposit with the same external id as a completed deposit response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=atom_dataset["entry-data0"] % origin_url, ) assert response.status_code == status.HTTP_403_FORBIDDEN assert b"must start with" in response.content
def test_put_atom_with_create_origin_and_external_identifier( authenticated_client, deposit_collection, atom_dataset, deposit_user): """<atom:external_identifier> was deprecated before <swh:create_origin> was introduced, clients should get an error when trying to use both """ external_id = "foobar" origin_url = deposit_user.provider_url + external_id url = reverse(COL_IRI, args=[deposit_collection.name]) response = post_atom( authenticated_client, url, data=atom_dataset["entry-data0"] % origin_url, HTTP_IN_PROGRESS="true", ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) edit_iri = response_content.find("atom:link[@rel='edit']", namespaces=NAMESPACES).attrib["href"] # when response = put_atom( authenticated_client, edit_iri, data=atom_dataset["error-with-external-identifier"] % external_id, HTTP_IN_PROGRESS="false", ) assert b"<external_identifier> is deprecated" in response.content assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_post_deposit_atom_no_origin_url_nor_slug_header( authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker): """Posting an atom entry without an origin url or a slug header should generate one""" url = reverse(COL_IRI, args=[deposit_collection.name]) slug = str(uuid.uuid4()) mocker.patch("uuid.uuid4", return_value=slug) # when response = post_atom( authenticated_client, url, data=atom_dataset["entry-data-no-origin-url"], HTTP_IN_PROGRESS="false", ) assert response.status_code == status.HTTP_201_CREATED response_content = ElementTree.fromstring(response.content) deposit_id = int( response_content.findtext("swh:deposit_id", namespaces=NAMESPACES)) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.collection == deposit_collection assert deposit.origin_url == deposit_user.provider_url + slug assert deposit.status == DEPOSIT_STATUS_DEPOSITED
def test_put_atom_with_create_origin_and_reference(authenticated_client, deposit_collection, atom_dataset, deposit_user): """<swh:reference> and <swh:create_origin> are mutually exclusive""" external_id = "foobar" origin_url = deposit_user.provider_url + external_id url = reverse(COL_IRI, args=[deposit_collection.name]) response = post_atom( authenticated_client, url, data=atom_dataset["entry-data0"] % origin_url, HTTP_IN_PROGRESS="true", ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) edit_iri = response_content.find("atom:link[@rel='edit']", namespaces=NAMESPACES).attrib["href"] # when response = put_atom( authenticated_client, edit_iri, data=atom_dataset["entry-data-with-origin-reference"].format( url=origin_url), HTTP_IN_PROGRESS="false", ) assert b"only one may be used on a given deposit" in response.content assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_replace_archive_to_deposit_is_possible( tmp_path, partial_deposit, deposit_collection, authenticated_client, sample_archive, atom_dataset, ): """Replace all archive with another one should return a 204 response""" tmp_path = str(tmp_path) # given deposit = partial_deposit requests = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(list(requests)) == 1 check_archive(sample_archive["name"], requests[0].archive.name) # we have no metadata for that deposit requests = list( DepositRequest.objects.filter(deposit=deposit, type="metadata")) assert len(requests) == 0 response = post_atom( authenticated_client, reverse(SE_IRI, args=[deposit_collection.name, deposit.id]), data=atom_dataset["entry-data1"], HTTP_SLUG=deposit.external_id, HTTP_IN_PROGRESS=True, ) requests = list( DepositRequest.objects.filter(deposit=deposit, type="metadata")) assert len(requests) == 1 update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) external_id = "some-external-id-1" archive2 = create_arborescence_archive(tmp_path, "archive2", "file2", b"some other content in file") response = put_archive( authenticated_client, update_uri, archive2, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", ) assert response.status_code == status.HTTP_204_NO_CONTENT requests = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(list(requests)) == 1 check_archive(archive2["name"], requests[0].archive.name) # check we did not touch the other parts requests = list( DepositRequest.objects.filter(deposit=deposit, type="metadata")) assert len(requests) == 1
def test_post_deposit_atom_parsing_error(authenticated_client, deposit_collection, atom_dataset): """Posting parsing error prone atom should return 400""" response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=atom_dataset["entry-data-parsing-error-prone"], HTTP_SLUG="external-id", ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Malformed xml metadata" in response.content
def test_post_deposit_atom_400_badly_formatted_atom(authenticated_client, deposit_collection, atom_dataset): """Posting a badly formatted atom should return a 400 response""" response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=atom_dataset["entry-data-badly-formatted"], HTTP_SLUG="external-id", ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Malformed xml metadata" in response.content
def test_post_deposit_atom_400_with_empty_request(authenticated_client, deposit_collection): """Posting empty request should return a 400 response""" response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data={}, HTTP_SLUG="external-id", CONTENT_LENGTH=0, ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Empty body request is not supported" in response.content
def test_post_deposit_atom_400_both_create_origin_and_add_to_origin( authenticated_client, deposit_collection, atom_dataset): """Posting a badly formatted atom should return a 400 response""" response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=atom_dataset[ "entry-data-with-both-create-origin-and-add-to-origin"], ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert (b"<swh:create_origin> and <swh:add_to_origin> " b"are mutually exclusive") in response.content
def test_post_deposit_atom_403_create_wrong_origin_url_prefix( authenticated_client, deposit_collection, atom_dataset, deposit_user): """Creating an origin for a prefix not owned by the client is forbidden""" origin_url = "http://example.org/foo" response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=atom_dataset["entry-data0"] % origin_url, HTTP_IN_PROGRESS="true", ) assert response.status_code == status.HTTP_403_FORBIDDEN assert "URL mismatch" in response.content.decode()
def test_deposit_metadata_invalid(authenticated_client, deposit_collection, atom_dataset): """Posting invalid swhid reference is bad request returned to client""" invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49" xml_data = atom_dataset["entry-data-with-swhid-no-prov"].format( swhid=invalid_swhid) response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=xml_data, ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Invalid SWHID reference" in response.content
def test_post_deposit_atom_400_with_empty_body(authenticated_client, deposit_collection, atom_dataset): """Posting empty body request should return a 400 response""" atom_content = atom_dataset["entry-data-empty-body"] response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=atom_content, HTTP_SLUG="external-id", ) assert (response.status_code == status.HTTP_400_BAD_REQUEST ), response.content.decode() assert b"Empty body request is not supported" in response.content
def test_post_deposit_atom_unknown_collection(authenticated_client, atom_dataset): """Posting an atom entry to an unknown collection should return a 404""" unknown_collection = "unknown-one" with pytest.raises(DepositCollection.DoesNotExist): DepositCollection.objects.get(name=unknown_collection) response = post_atom( authenticated_client, reverse(COL_IRI, args=[unknown_collection]), data=atom_dataset["entry-data0"], HTTP_SLUG="something", ) assert response.status_code == status.HTTP_404_NOT_FOUND assert b"Unknown collection" in response.content
def test_deposit_metadata_fails_functional_checks(authenticated_client, deposit_collection, atom_dataset): """Posting functionally invalid metadata swhid is bad request returned to client""" swhid = "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49" invalid_xml_data = atom_dataset[ "entry-data-with-swhid-fail-metadata-functional-checks"].format( swhid=swhid) response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=invalid_xml_data, ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Functional metadata checks failure" in response.content
def test_deposit_metadata_invalid_metadata_provenance(authenticated_client, deposit_collection, atom_dataset): """Posting invalid metadata provenance is bad request returned to client""" invalid_swhid = "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49" xml_data = atom_dataset["entry-data-with-swhid"].format( swhid=invalid_swhid, metadata_provenance_url=( "https://inria.halpreprod.archives-ouvertes.fr/hal-abcdefgh"), ) response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=xml_data, ) assert response.status_code == status.HTTP_403_FORBIDDEN assert b"URL mismatch" in response.content
def test_add_metadata_to_unknown_deposit(deposit_collection, authenticated_client, atom_dataset): """Replacing metadata to unknown deposit should return a 404 response""" unknown_deposit_id = 1000 try: Deposit.objects.get(pk=unknown_deposit_id) except Deposit.DoesNotExist: assert True url = reverse(SE_IRI, args=[deposit_collection, unknown_deposit_id]) response = post_atom( authenticated_client, url, data=atom_dataset["entry-data1"], ) assert response.status_code == status.HTTP_404_NOT_FOUND response_content = parse_xml(response.content) assert "Deposit 1000 does not exist" in response_content.findtext( "atom:summary", namespaces=NAMESPACES)
def test_post_deposit_atom_entry_initial(authenticated_client, deposit_collection, atom_dataset, deposit_user): """Posting an initial atom entry should return 201 with deposit receipt""" # given origin_url = deposit_user.provider_url + "1225c695-cfb8-4ebb-aaaa-80da344efa6a" with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(origin_url=origin_url) atom_entry_data = atom_dataset["entry-data0"] % origin_url # when date_before = datetime.datetime.now(tz=datetime.timezone.utc) response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=atom_entry_data, HTTP_IN_PROGRESS="false", ) date_after = datetime.datetime.now(tz=datetime.timezone.utc) # then assert response.status_code == status.HTTP_201_CREATED, response.content.decode( ) response_content = ElementTree.fromstring(response.content) deposit_id = int( response_content.findtext("swh:deposit_id", namespaces=NAMESPACES)) assert (date_before <= datetime.datetime.fromisoformat( response_content.findtext("swh:deposit_date", namespaces=NAMESPACES)) <= date_after) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.collection == deposit_collection assert deposit.origin_url == origin_url assert deposit.status == DEPOSIT_STATUS_DEPOSITED # one associated request to a deposit deposit_request = DepositRequest.objects.get(deposit=deposit) assert deposit_request.raw_metadata == atom_entry_data assert bool(deposit_request.archive) is False
def test_deposit_metadata_unknown_origin( authenticated_client, deposit_collection, atom_dataset, swh_storage, ): """Posting a swhid reference is stored on raw extrinsic metadata storage""" url = "https://gitlab.org/user/repo" xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url) response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=xml_data, ) assert (response.status_code == status.HTTP_400_BAD_REQUEST ), response.content.decode() response_content = ElementTree.fromstring(response.content) assert "known to the archive" in response_content.findtext( "atom:summary", namespaces=NAMESPACES)
def partial_deposit_only_metadata( deposit_collection, authenticated_client, atom_dataset ): response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=atom_dataset["entry-data1"], HTTP_SLUG="external-id-partial", HTTP_IN_PROGRESS=True, ) assert response.status_code == status.HTTP_201_CREATED response_content = ElementTree.fromstring(response.content) deposit_id = int(response_content.findtext("swh:deposit_id", namespaces=NAMESPACES)) from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_PARTIAL return deposit
def test_add_metadata_to_unknown_collection(partial_deposit, authenticated_client, atom_dataset): """Replacing metadata to unknown deposit should return a 404 response""" deposit = partial_deposit unknown_collection_name = "unknown-collection" try: DepositCollection.objects.get(name=unknown_collection_name) except DepositCollection.DoesNotExist: assert True url = reverse(SE_IRI, args=[unknown_collection_name, deposit.id]) response = post_atom( authenticated_client, url, data=atom_dataset["entry-data1"], ) assert response.status_code == status.HTTP_404_NOT_FOUND response_content = parse_xml(response.content) assert "Unknown collection name" in response_content.findtext( "atom:summary", namespaces=NAMESPACES)
def test_post_deposit_atom_with_mismatched_slug_and_external_identifier( authenticated_client, deposit_collection, atom_dataset): """Posting an atom entry with mismatched slug header and external_identifier should return a 400 """ external_id = "foobar" url = reverse(COL_IRI, args=[deposit_collection.name]) # when response = post_atom( authenticated_client, url, data=atom_dataset["error-with-external-identifier"] % external_id, HTTP_IN_PROGRESS="false", HTTP_SLUG="something", ) assert ( b"The <external_identifier> tag and Slug header are deprecated" in response.content) assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_add_deposit_with_add_to_origin_and_external_identifier( authenticated_client, deposit_collection, completed_deposit, atom_dataset, deposit_user, ): """Posting deposit with <swh:add_to_origin> creates a new deposit with parent""" # given multiple deposit already loaded origin_url = deposit_user.provider_url + completed_deposit.external_id # adding a new deposit with the same external id as a completed deposit # creates the parenting chain response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=atom_dataset["entry-data-with-both-add-to-origin-and-external-id"] % origin_url, ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"<external_identifier> is deprecated" in response.content
def test_add_deposit_add_to_wrong_origin( authenticated_client, deposit_collection, atom_dataset, sample_archive, ): """Posting a deposit with an <swh:add_to_origin> referencing an origin not starting with the provider_url raises an error """ origin_url = "http://example.org/foo" # adding a new deposit with the same external id as a completed deposit response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=atom_dataset["entry-data0"] % origin_url, ) assert response.status_code == status.HTTP_403_FORBIDDEN, response.content.decode( ) assert b"must start with" in response.content
def test_add_metadata_to_deposit_is_possible( authenticated_client, deposit_collection, partial_deposit_with_metadata, atom_dataset, deposit_user, ): """Add metadata with another one should return a 204 response""" deposit = partial_deposit_with_metadata origin_url = deposit_user.provider_url + deposit.external_id requests = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests) == 1 requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive0) == 1 update_uri = reverse(SE_IRI, args=[deposit_collection.name, deposit.id]) atom_entry = atom_dataset["entry-data1"] response = post_atom(authenticated_client, update_uri, data=atom_entry) assert response.status_code == status.HTTP_201_CREATED requests = DepositRequest.objects.filter(deposit=deposit, type="metadata").order_by("id") assert len(requests) == 2 expected_raw_meta0 = atom_dataset["entry-data0"] % origin_url # a new one was added assert requests[0].raw_metadata == expected_raw_meta0 assert requests[1].raw_metadata == atom_entry # check we did not touch the other parts requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive1) == 1 assert set(requests_archive0) == set(requests_archive1)
def test_post_deposit_atom_with_create_origin_and_reference( authenticated_client, deposit_collection, atom_dataset, deposit_user): """<swh:reference> and <swh:create_origin> are mutually exclusive""" external_id = "foobar" origin_url = deposit_user.provider_url + external_id url = reverse(COL_IRI, args=[deposit_collection.name]) document = atom_dataset["error-with-reference-and-create-origin"].format( external_id=external_id, url=origin_url, ) # when response = post_atom( authenticated_client, url, data=document, HTTP_IN_PROGRESS="false", ) assert b"only one may be used on a given deposit" in response.content assert response.status_code == status.HTTP_400_BAD_REQUEST