Exemplo n.º 1
0
    def test_dirpath(self):
        dataset = verta.dataset.Path("modelapi_hypothesis/")
        assert len(dataset.list_components()) > 1

        for component in dataset.list_components():
            assert component.path != ""
            assert component.size != 0
            assert component.last_modified != 0
            assert component.md5 != ""
Exemplo n.º 2
0
    def test_s3_bucket(self):
        # pylint: disable=no-member
        dataset = verta.dataset.S3("s3://verta-starter")
        assert len(dataset.list_components()) > 1

        for component in dataset.list_components():
            assert component.path != ""
            assert component.size != 0
            assert component.last_modified != 0
            assert component.md5 != ""
Exemplo n.º 3
0
    def test_filepath(self):
        dataset = verta.dataset.Path("modelapi_hypothesis/api_generator.py")

        assert len(dataset.list_components()) == 1

        component = dataset.list_components()[0]
        assert component.path != ""
        assert component.size != 0
        assert component.last_modified != 0
        assert component.md5 != ""
Exemplo n.º 4
0
    def test_s3_key(self):
        # pylint: disable=no-member
        dataset = verta.dataset.S3("s3://verta-starter/census-test.csv")

        assert len(dataset.list_components()) == 1

        component = dataset.list_components()[0]
        assert component.path != ""
        assert component.size != 0
        assert component.last_modified != 0
        assert component.md5 != ""
Exemplo n.º 5
0
    def test_multiple_filepaths(self):
        dataset = verta.dataset.Path([
            "modelapi_hypothesis/api_generator.py",
            "modelapi_hypothesis/test_modelapi.py",
        ])
        assert len(dataset.list_components()) == 2

        for component in dataset.list_components():
            assert component.path != ""
            assert component.size != 0
            assert component.last_modified != 0
            assert component.md5 != ""
Exemplo n.º 6
0
    def test_versioned_object(self):
        s3 = pytest.importorskip("boto3").client('s3')

        bucket = "verta-versioned-bucket"
        key = "data/census-train.csv"

        obj = s3.head_object(Bucket=bucket, Key=key)
        latest_version_id = obj['VersionId']

        dataset = verta.dataset.S3("s3://{}/{}".format(bucket, key))

        assert len(dataset.list_components()) == 1
        assert dataset.list_components()[0].s3_version_id == latest_version_id
Exemplo n.º 7
0
    def test_s3_multiple_keys(self):
        # pylint: disable=no-member
        dataset = verta.dataset.S3([
            "s3://verta-starter/census-test.csv",
            "s3://verta-starter/census-train.csv",
        ])

        assert len(dataset.list_components()) == 2

        for component in dataset.list_components():
            assert component.path != ""
            assert component.size != 0
            assert component.last_modified != 0
            assert component.md5 != ""
Exemplo n.º 8
0
    def test_versioned_folder(self):
        s3 = pytest.importorskip("boto3").client('s3')
        S3_PATH = verta.dataset.S3._S3_PATH

        bucket = "verta-versioned-bucket"
        folder = "data/"
        s3_url = "s3://{}/{}".format(bucket, folder)

        # collect latest versions of objects with folder as prefix
        version_ids = {
            S3_PATH.format(bucket, obj['Key']): obj['VersionId']
            for obj in s3.list_object_versions(Bucket=bucket, Prefix=folder)
            ['Versions'] if obj['IsLatest']
        }
        for path, version_id in version_ids.items():
            if version_id == "null":
                # S3 returns "null" in its API, but we handle that as empty string
                version_ids[path] = ""

        dataset = verta.dataset.S3(s3_url)

        for component in dataset.list_components():
            assert component.s3_version_id == version_ids[component.path]
            assert not component.path.endswith('/')
            assert component.size != 0
Exemplo n.º 9
0
    def test_concat(self):
        dataset1 = verta.dataset.Path("modelapi_hypothesis/")
        dataset2 = verta.dataset.Path("versioning/")
        components = dataset1.list_components() + dataset2.list_components()
        components = list(sorted(components, key=lambda component: component.path))

        dataset = dataset1 + dataset2
        assert dataset.list_components() == components

        # commutative
        dataset = dataset2 + dataset1
        assert dataset.list_components() == components

        # assignment
        dataset1 += dataset2
        assert dataset1.list_components() == components
Exemplo n.º 10
0
    def test_concat(self):
        dataset1 = verta.dataset.S3("s3://verta-starter/")
        dataset2 = verta.dataset.S3("s3://verta-versioned-bucket/")
        components = dataset1.list_components() + dataset2.list_components()
        components = list(sorted(components, key=lambda component: component.path))

        dataset = dataset1 + dataset2
        assert dataset.list_components() == components

        # commutative
        dataset = dataset2 + dataset1
        assert dataset.list_components() == components

        # assignment
        dataset1 += dataset2
        assert dataset1.list_components() == components
Exemplo n.º 11
0
    def test_versioned_object_by_id(self):
        s3 = pytest.importorskip("boto3").client('s3')

        bucket = "verta-versioned-bucket"
        key = "data/census-train.csv"
        s3_url = "s3://{}/{}".format(bucket, key)

        # pick a version that's not the latest
        version_ids = [
            obj['VersionId']
            for obj in s3.list_object_versions(Bucket=bucket)['Versions']
            if not obj['IsLatest'] and obj['Key'] == key
        ]
        version_id = version_ids[0]

        s3_loc = verta.dataset._s3.S3Location(s3_url, version_id)
        dataset = verta.dataset.S3(s3_loc)

        assert len(dataset.list_components()) == 1
        assert dataset.list_components()[0].s3_version_id == version_id
Exemplo n.º 12
0
    def test_add(self):
        path1 = "versioning/test_code.py"
        path2 = "versioning/test_dataset.py"

        dataset = verta.dataset.Path(path1)
        dataset.add(path2)

        # as if we had added two separate blobs together
        dataset1 = verta.dataset.Path(path1)
        dataset2 = verta.dataset.Path(path2)
        components = dataset1.list_components() + dataset2.list_components()
        components = list(sorted(components, key=lambda component: component.path))

        assert dataset.list_components() == components
Exemplo n.º 13
0
    def test_concat_base_path(self):
        dataset1 = verta.dataset.Path(
            "modelapi_hypothesis/",
            base_path="modelapi_hypothesis/",
        )
        dataset2 = verta.dataset.Path(
            "versioning/",
            base_path="versioning/",
        )
        components = dataset1.list_components() + dataset2.list_components()
        components = list(sorted(components, key=lambda component: component.path))

        dataset = dataset1 + dataset2
        assert dataset.list_components() == components
Exemplo n.º 14
0
    def test_add(self):
        path1 = "s3://verta-starter/census-train.csv"
        path2 = "s3://verta-starter/census-test.csv"

        dataset = verta.dataset.S3(path1)
        dataset.add(path2)

        # as if we had added two separate blobs together
        dataset1 = verta.dataset.S3(path1)
        dataset2 = verta.dataset.S3(path2)
        components = dataset1.list_components() + dataset2.list_components()
        components = list(sorted(components, key=lambda component: component.path))

        assert dataset.list_components() == components
Exemplo n.º 15
0
    def test_versioned_bucket(self):
        s3 = pytest.importorskip("boto3").client('s3')
        S3_PATH = verta.dataset.S3._S3_PATH

        bucket = "verta-versioned-bucket"

        # collect latest versions of objects
        version_ids = {
            S3_PATH.format(bucket, obj['Key']): obj['VersionId']
            for obj in s3.list_object_versions(Bucket=bucket)['Versions']
            if obj['IsLatest']
        }
        for path, version_id in version_ids.items():
            if version_id == "null":
                # S3 returns "null" in its API, but we handle that as empty string
                version_ids[path] = ""

        dataset = verta.dataset.S3("s3://{}".format(bucket))

        for component in dataset.list_components():
            assert component.s3_version_id == version_ids[component.path]