def test_post_deposit_2_post_2_different_deposits(authenticated_client, deposit_collection, sample_archive): """2 posting deposits should return 2 different 201 with receipt""" url = reverse(COL_IRI, args=[deposit_collection.name]) # when response = post_archive( authenticated_client, url, sample_archive, HTTP_SLUG="some-external-id-1", HTTP_IN_PROGRESS="false", ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) deposit_id = int( response_content.findtext("swh:deposit_id", namespaces=NAMESPACES)) deposit = Deposit.objects.get(pk=deposit_id) deposits = Deposit.objects.all() assert len(deposits) == 1 assert deposits[0] == deposit # second post response = post_archive( authenticated_client, url, sample_archive, content_type="application/x-tar", HTTP_SLUG="another-external-id", HTTP_IN_PROGRESS="false", ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) deposit_id2 = int( response_content.findtext("swh:deposit_id", namespaces=NAMESPACES)) deposit2 = Deposit.objects.get(pk=deposit_id2) assert deposit != deposit2 deposits = Deposit.objects.all().order_by("id") assert len(deposits) == 2 assert list(deposits), [deposit == deposit2]
def test_post_deposit_binary_upload_fail_if_content_length_missing( authenticated_client, deposit_collection, sample_archive, tmp_path): """The Content-Length header is mandatory""" tmp_path = str(tmp_path) url = reverse(COL_IRI, args=[deposit_collection.name]) archive = create_arborescence_archive(tmp_path, "archive2", "file2", b"some content in file", up_to_size=500) external_id = "some-external-id" # when response = post_archive( authenticated_client, url, archive, CONTENT_LENGTH=None, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", ) # then assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"the CONTENT_LENGTH header must be sent." in response.content with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id)
def test_post_deposit_binary_upload_fail_if_upload_size_limit_exceeded( authenticated_client, deposit_collection, sample_archive, tmp_path): """Binary upload must not exceed the limit set up...""" tmp_path = str(tmp_path) url = reverse(COL_IRI, args=[deposit_collection.name]) archive = create_arborescence_archive(tmp_path, "archive2", "file2", b"some content in file", up_to_size=5000) external_id = "some-external-id" # when response = post_archive( authenticated_client, url, archive, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", ) # then assert response.status_code == status.HTTP_413_REQUEST_ENTITY_TOO_LARGE assert b"Upload size limit exceeded" in response.content with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id)
def test_post_deposit_binary_no_slug(authenticated_client, deposit_collection, sample_archive, deposit_user, mocker): """Posting a binary deposit without slug header should generate one""" id_ = str(uuid.uuid4()) mocker.patch("uuid.uuid4", return_value=id_) url = reverse(COL_IRI, args=[deposit_collection.name]) # when response = post_archive( authenticated_client, url, sample_archive, in_progress="false", ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) deposit_id = int( response_content.findtext("swh:deposit_id", namespaces=NAMESPACES)) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.collection == deposit_collection assert deposit.origin_url == deposit_user.provider_url + id_ assert deposit.status == DEPOSIT_STATUS_DEPOSITED
def create_deposit( client, collection_name: str, sample_archive, external_id: str, deposit_status=DEPOSIT_STATUS_DEPOSITED, in_progress=False, ): """Create a skeleton shell deposit""" url = reverse(COL_IRI, args=[collection_name]) # when response = post_archive( client, url, sample_archive, HTTP_SLUG=external_id, HTTP_IN_PROGRESS=str(in_progress).lower(), ) # then assert response.status_code == status.HTTP_201_CREATED, response.content.decode() from swh.deposit.models import Deposit response_content = ElementTree.fromstring(response.content) deposit_id = int( response_content.findtext("swh:deposit_id", "", namespaces=NAMESPACES) ) deposit = Deposit._default_manager.get(id=deposit_id) if deposit.status != deposit_status: deposit.status = deposit_status deposit.save() assert deposit.status == deposit_status return deposit
def test_add_archive_to_deposit_is_possible( tmp_path, authenticated_client, deposit_collection, partial_deposit_with_metadata, sample_archive, ): """Add another archive to a deposit return a 201 response""" tmp_path = str(tmp_path) deposit = partial_deposit_with_metadata requests = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests) == 1 check_archive(sample_archive["name"], requests[0].archive.name) requests_meta0 = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests_meta0) == 1 update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) external_id = "some-external-id-1" archive2 = create_arborescence_archive(tmp_path, "archive2", "file2", b"some other content in file") response = post_archive( authenticated_client, update_uri, archive2, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", ) assert response.status_code == status.HTTP_201_CREATED requests = DepositRequest.objects.filter(deposit=deposit, type="archive").order_by("id") assert len(requests) == 2 # first archive still exists check_archive(sample_archive["name"], requests[0].archive.name) # a new one was added check_archive(archive2["name"], requests[1].archive.name) # check we did not touch the other parts requests_meta1 = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests_meta1) == 1 assert set(requests_meta0) == set(requests_meta1)
def test_post_deposit_mediation_not_supported(authenticated_client, deposit_collection, sample_archive): """Binary upload with mediation should return a 412 response""" # given url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id-1" # when response = post_archive( authenticated_client, url, sample_archive, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", HTTP_ON_BEHALF_OF="someone", ) # then assert response.status_code == status.HTTP_412_PRECONDITION_FAILED with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id)
def test_post_deposit_binary_upload_no_content_disposition_header( authenticated_client, deposit_collection, sample_archive): """Binary upload without content_disposition header should return 400""" # given url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id" # when response = post_archive( authenticated_client, url, sample_archive, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION=None, ) # then assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"CONTENT_DISPOSITION header is mandatory" in response.content with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id)
def test_post_deposit_binary_failure_unsupported_packaging_header( authenticated_client, deposit_collection, sample_archive): """Bin deposit without supported content_disposition header returns 400""" # given url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id" # when response = post_archive( authenticated_client, url, sample_archive, HTTP_SLUG=external_id, HTTP_PACKAGING="something-unsupported", ) # then assert response.status_code == status.HTTP_400_BAD_REQUEST assert (b"The packaging provided something-unsupported is not supported" in response.content) with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id)
def test_post_deposit_binary_upload_ok(authenticated_client, deposit_collection, sample_archive): """Binary upload with correct headers should return 201 with receipt""" # given url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id-1" # when response = post_archive( authenticated_client, url, sample_archive, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", ) # then response_content = parse_xml(response.content) assert response.status_code == status.HTTP_201_CREATED deposit_id = int( response_content.findtext("swh:deposit_id", namespaces=NAMESPACES)) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swhid is None deposit_request = DepositRequest.objects.get(deposit=deposit) check_archive(sample_archive["name"], deposit_request.archive.name) assert deposit_request.metadata is None assert deposit_request.raw_metadata is None response_content = parse_xml(response.content) assert (response_content.findtext( "swh:deposit_archive", namespaces=NAMESPACES) == sample_archive["name"]) assert (int( response_content.findtext("swh:deposit_id", namespaces=NAMESPACES)) == deposit.id) assert (response_content.findtext("swh:deposit_status", namespaces=NAMESPACES) == deposit.status) # deprecated tags assert (response_content.findtext( "atom:deposit_archive", namespaces=NAMESPACES) == sample_archive["name"]) assert (int( response_content.findtext("atom:deposit_id", namespaces=NAMESPACES)) == deposit.id) assert (response_content.findtext("atom:deposit_status", namespaces=NAMESPACES) == deposit.status) from django.urls import reverse as reverse_strict edit_iri = reverse_strict("edit_iri", args=[deposit_collection.name, deposit.id]) assert response["location"] == f"http://testserver{edit_iri}"
def test_post_deposit_binary_and_post_to_add_another_archive( authenticated_client, deposit_collection, sample_archive, tmp_path): """Updating a deposit should return a 201 with receipt""" tmp_path = str(tmp_path) url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id-1" # when response = post_archive( authenticated_client, url, sample_archive, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="true", ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) deposit_id = response_content.findtext("swh:deposit_id", namespaces=NAMESPACES) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == "partial" assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swhid is None deposit_request = DepositRequest.objects.get(deposit=deposit) assert deposit_request.deposit == deposit assert deposit_request.type == "archive" check_archive(sample_archive["name"], deposit_request.archive.name) # 2nd archive to upload archive2 = create_arborescence_archive(tmp_path, "archive2", "file2", b"some other content in file") # uri to update the content update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit_id]) # adding another archive for the deposit and finalizing it response = post_archive( authenticated_client, update_uri, archive2, HTTP_SLUG=external_id, ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swhid is None deposit_requests = list( DepositRequest.objects.filter(deposit=deposit).order_by("id")) # 2 deposit requests for the same deposit assert len(deposit_requests) == 2 assert deposit_requests[0].deposit == deposit assert deposit_requests[0].type == "archive" check_archive(sample_archive["name"], deposit_requests[0].archive.name) assert deposit_requests[1].deposit == deposit assert deposit_requests[1].type == "archive" check_archive(archive2["name"], deposit_requests[1].archive.name) # only 1 deposit in db deposits = Deposit.objects.all() assert len(deposits) == 1
def test_post_deposit_then_update_refused(authenticated_client, deposit_collection, sample_archive, atom_dataset, tmp_path): """Updating a deposit with status 'ready' should return a 400""" tmp_path = str(tmp_path) url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id-1" # when response = post_archive( authenticated_client, url, sample_archive, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) deposit_id = response_content.findtext("swh:deposit_id", namespaces=NAMESPACES) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swhid is None deposit_request = DepositRequest.objects.get(deposit=deposit) assert deposit_request.deposit == deposit check_archive(sample_archive["name"], deposit_request.archive.name) # updating/adding is forbidden # uri to update the content edit_iri = reverse("edit_iri", args=[deposit_collection.name, deposit_id]) se_iri = reverse("se_iri", args=[deposit_collection.name, deposit_id]) em_iri = reverse("em_iri", args=[deposit_collection.name, deposit_id]) # Testing all update/add endpoint should fail # since the status is ready archive2 = create_arborescence_archive(tmp_path, "archive2", "file2", b"some content in file 2") # replacing file is no longer possible since the deposit's # status is ready r = put_archive( authenticated_client, em_iri, archive2, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert (ET.fromstring(r.content).findtext("atom:summary", namespaces=NAMESPACES) == "You can only act on deposit with status 'partial'") # adding file is no longer possible since the deposit's status # is ready r = post_archive( authenticated_client, em_iri, archive2, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert (ET.fromstring(r.content).findtext("atom:summary", namespaces=NAMESPACES) == "You can only act on deposit with status 'partial'") # replacing metadata is no longer possible since the deposit's # status is ready r = put_atom( authenticated_client, edit_iri, data=atom_dataset["entry-data-deposit-binary"], CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]), HTTP_SLUG=external_id, ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert (ET.fromstring(r.content).findtext("atom:summary", namespaces=NAMESPACES) == "You can only act on deposit with status 'partial'") # adding new metadata is no longer possible since the # deposit's status is ready r = post_atom( authenticated_client, se_iri, data=atom_dataset["entry-data-deposit-binary"], CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]), HTTP_SLUG=external_id, ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert (ET.fromstring(r.content).findtext("atom:summary", namespaces=NAMESPACES) == "You can only act on deposit with status 'partial'") archive_content = b"some content representing archive" archive = InMemoryUploadedFile( BytesIO(archive_content), field_name="archive0", name="archive0", content_type="application/zip", size=len(archive_content), charset=None, ) atom_entry = InMemoryUploadedFile( BytesIO(atom_dataset["entry-data-deposit-binary"].encode("utf-8")), field_name="atom0", name="atom0", content_type='application/atom+xml; charset="utf-8"', size=len(atom_dataset["entry-data-deposit-binary"]), charset="utf-8", ) # replacing multipart metadata is no longer possible since the # deposit's status is ready r = authenticated_client.put( edit_iri, format="multipart", data={ "archive": archive, "atom_entry": atom_entry, }, ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert (ET.fromstring(r.content).findtext("atom:summary", namespaces=NAMESPACES) == "You can only act on deposit with status 'partial'") # adding new metadata is no longer possible since the # deposit's status is ready r = authenticated_client.post( se_iri, format="multipart", data={ "archive": archive, "atom_entry": atom_entry, }, ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert (ET.fromstring(r.content).findtext("atom:summary", namespaces=NAMESPACES) == "You can only act on deposit with status 'partial'")