Пример #1
0
def test_download_glob_to_file_nonexistent_blob(tmpdir):
    nonexistent_gcs_path = (
        "gs://vcm-ml-code-testing-data/non_existent_dir/non_existent_file.lol")
    blob = gcs.init_blob_from_gcs_url(nonexistent_gcs_path)

    with pytest.raises(NotFound):
        gcs.download_blob_to_file(blob, tmpdir, "non_existsent.file")
Пример #2
0
def test_download_blob_to_file(tmpdir):
    txt_filename = "test_datafile.txt"
    gcs_path = "gs://vcm-ml-code-testing-data/cloud_gcs/"

    blob = gcs.init_blob_from_gcs_url(gcs_path + txt_filename)
    outfile_path = gcs.download_blob_to_file(blob, tmpdir, txt_filename)

    assert outfile_path.exists()
Пример #3
0
def test_download_blob_to_file_makes_destination_directories(tmpdir):
    txt_filename = "test_datafile.txt"
    gcs_path = "gs://vcm-ml-code-testing-data/cloud_gcs/"
    nonexistent_path = Path("does/not/exist")

    blob = gcs.init_blob_from_gcs_url(gcs_path + txt_filename)

    non_existent_dir = Path(tmpdir, nonexistent_path)
    assert not non_existent_dir.exists()

    gcs.download_blob_to_file(blob, non_existent_dir, txt_filename)
    assert non_existent_dir.exists()
Пример #4
0
def test_upload_dir_to_gcs_does_not_upload_subdir(tmpdir):

    x = (1, 2, 3, 4)
    with open(Path(tmpdir, "what_a_pickle.pkl"), "wb") as f:
        pickle.dump(x, f)

    extra_subdir = Path(tmpdir, "extra_dir")
    extra_subdir.mkdir()

    with open(Path(extra_subdir, "extra_pickle.pkl"), "wb") as f:
        pickle.dump(x, f)

    # TODO: use pytest fixture to do setup/teardown of temporary gcs dir

    upload_dir = "transient"
    bucket_name = "vcm-ml-code-testing-data"
    gcs_url_prefix = f"gs://{bucket_name}"
    tmp_gcs_dir = f"test_upload/{upload_dir}"
    tmp_gcs_url = f"{gcs_url_prefix}/{tmp_gcs_dir}"

    gcs.upload_dir_to_gcs(bucket_name, tmp_gcs_dir, Path(tmpdir))

    uploaded_pickle_url = f"{tmp_gcs_url}/what_a_pickle.pkl"
    not_uploaded_pickle_url = f"{tmp_gcs_url}/extra_dir/extra_pickle.pkl"

    # Sleeps added to reduce api request rate by circleci
    time.sleep(0.1)
    pkl_blob = gcs.init_blob_from_gcs_url(uploaded_pickle_url)
    nonexistent_pkl_blob = gcs.init_blob_from_gcs_url(not_uploaded_pickle_url)

    time.sleep(0.1)
    assert pkl_blob.exists()
    time.sleep(0.1)
    pkl_blob.delete()

    assert not nonexistent_pkl_blob.exists()
Пример #5
0
def test_upload_dir_to_gcs(tmpdir):
    src_dir_to_upload = Path(__file__).parent.joinpath("test_data")
    gcs.upload_dir_to_gcs("vcm-ml-code-testing-data", "test_upload",
                          src_dir_to_upload)

    test_files = ["test_datafile.txt", "test_data.tar"]

    for filename in test_files:
        gcs_url = f"gs://vcm-ml-code-testing-data/test_upload/{filename}"
        file_blob = gcs.init_blob_from_gcs_url(gcs_url)
        assert file_blob.exists()

        downloaded_path = gcs.download_blob_to_file(
            file_blob, Path(tmpdir, "test_uploaded"), filename)
        local_file = src_dir_to_upload.joinpath(filename)
        _compare_checksums(local_file, downloaded_path)
        file_blob.delete()
Пример #6
0
def test_files_exist_on_gcs(gcs_url):
    blob = gcs.init_blob_from_gcs_url(gcs_url)
    assert blob.exists()
Пример #7
0
def test_init_blob_from_gcs_url():
    result = gcs.init_blob_from_gcs_url(
        "gs://test_bucket/test_blobdir/test_blob.nc")
    assert isinstance(result, Blob)
    assert result.bucket.name == "test_bucket"
    assert result.name == "test_blobdir/test_blob.nc"
Пример #8
0
def download_to_file(url: str, dest: str):
    logging.info(f"Downloading from {url} to {dest}")
    blob = gcs.init_blob_from_gcs_url(url)
    blob.download_to_filename(dest)
    logging.info(f"Done downloading to {dest}")