Exemplo n.º 1
0
def test_gcs_upload_chunking(mocker, mock_client_upload_gcs, requests_mock):
    mocker.patch("faculty.datasets.transfer.DEFAULT_CHUNK_SIZE", 1000)
    chunk_headers = [
        {
            "Content-Length": "1000",
            "Content-Range": "bytes 0-999/*"
        },
        {
            "Content-Length": "1000",
            "Content-Range": "bytes 1000-1999/2000"
        },
    ]

    for headers in chunk_headers:
        requests_mock.put(TEST_URL, status_code=200, request_headers=headers)

    transfer.upload(mock_client_upload_gcs, PROJECT_ID, TEST_PATH,
                    TEST_CONTENT)

    history = requests_mock.request_history
    assert len(history) == 2

    _assert_contains(history[0].headers, chunk_headers[0])
    _assert_contains(history[1].headers, chunk_headers[1])
    assert history[0].text.encode("utf8") == TEST_CONTENT[:1000]
    assert history[1].text.encode("utf8") == TEST_CONTENT[1000:]
Exemplo n.º 2
0
def test_s3_dynamic_chunk_upload(mocker, mock_client_upload_s3, requests_mock,
                                 max_chunks, expected_chunks):
    mocker.patch("faculty.datasets.transfer.DEFAULT_CHUNK_SIZE", 100)
    mocker.patch("faculty.datasets.transfer.S3_MAX_CHUNKS", max_chunks)
    chunk_size = int(math.ceil(len(TEST_CONTENT) / expected_chunks))
    chunks = iter([
        TEST_CONTENT[i * chunk_size:(i + 1) * chunk_size]
        for i in range(expected_chunks)
    ])
    chunk_matchers = [
        lambda x: next(chunks) == x.text.encode("utf-8")
        for i in range(max_chunks)
    ]
    urls = [
        "https://example.com/presigned-url-{i}/url".format(i=i)
        for i in range(max_chunks)
    ]
    etags = ["tag-{tagid}".format(tagid=i) for i in range(max_chunks)]

    for url, matcher, etag in zip(urls, chunk_matchers, etags):
        requests_mock.put(
            url,
            status_code=200,
            additional_matcher=matcher,
            headers={"ETag": etag},
        )

    mock_client_upload_s3.presign_upload_part.side_effect = urls

    transfer.upload(mock_client_upload_s3, PROJECT_ID, TEST_PATH, TEST_CONTENT)

    history = requests_mock.request_history
    assert len(history) == expected_chunks
Exemplo n.º 3
0
def test_gcs_upload_empty_object(mock_client_upload_gcs, requests_mock):
    test_content = "".encode("utf8")

    requests_mock.put(TEST_URL,
                      request_headers={"Content-Length": "0"},
                      status_code=200)

    transfer.upload(mock_client_upload_gcs, PROJECT_ID, TEST_PATH,
                    test_content)
Exemplo n.º 4
0
def test_gcs_upload(mock_client_upload_gcs, requests_mock):
    requests_mock.put(
        TEST_URL,
        request_headers={
            "Content-Length": "2000",
            "Content-Range": "bytes 0-1999/2000",
        },
        status_code=200,
    )

    transfer.upload(mock_client_upload_gcs, PROJECT_ID, TEST_PATH,
                    TEST_CONTENT)
Exemplo n.º 5
0
def test_s3_upload(mock_client_upload_s3, requests_mock):
    def chunk_request_matcher(request):
        return TEST_CONTENT == request.text.encode("utf-8")

    requests_mock.put(
        TEST_URL,
        additional_matcher=chunk_request_matcher,
        headers={"ETag": TEST_ETAG},
        status_code=200,
    )

    mock_client_upload_s3.presign_upload_part.return_value = TEST_URL

    transfer.upload(mock_client_upload_s3, PROJECT_ID, TEST_PATH, TEST_CONTENT)
    mock_client_upload_s3.complete_multipart_upload.assert_called_once_with(
        PROJECT_ID, TEST_PATH, TEST_S3_UPLOAD_ID, [TEST_COMPLETED_PART])
Exemplo n.º 6
0
def test_s3_upload_chunks(mocker, mock_client_upload_s3, requests_mock):
    mocker.patch("faculty.datasets.transfer.UPLOAD_CHUNK_SIZE", 1000)

    def first_chunk_request_matcher(request):
        return TEST_CONTENT[0:1000] == request.text.encode("utf-8")

    def second_chunk_request_matcher(request):
        return TEST_CONTENT[1000::] == request.text.encode("utf-8")

    mock_client_upload_s3.presign_upload_part.side_effect = [
        TEST_URL,
        OTHER_URL,
    ]

    requests_mock.put(
        TEST_URL,
        additional_matcher=first_chunk_request_matcher,
        headers={"ETag": TEST_ETAG},
        status_code=200,
    )

    requests_mock.put(
        OTHER_URL,
        additional_matcher=second_chunk_request_matcher,
        headers={"ETag": OTHER_ETAG},
        status_code=200,
    )

    mock_client_upload_s3.presign_upload_part.return_value = TEST_URL

    transfer.upload(mock_client_upload_s3, PROJECT_ID, TEST_PATH, TEST_CONTENT)

    history = requests_mock.request_history
    assert len(history) == 2

    mock_client_upload_s3.complete_multipart_upload.assert_called_once_with(
        PROJECT_ID,
        TEST_PATH,
        TEST_S3_UPLOAD_ID,
        [TEST_COMPLETED_PART, OTHER_COMPLETED_PART],
    )