Пример #1
0
    def test_artifact_hash(self):
        # do not change this test case without coordinating with Artifact API's ArtifactHashComputer
        artifacts = [
            ArtifactFileData(
                file_path="to/file1",
                file_hash="c38444d2ccff1a7aab3d323fb6234e1b4f0a81ac",
                type="S3",
                size=5234,
                metadata={
                    "location": "s3://bucket/path/to/file1",
                    "last_modification": "2021-08-09 10:22:53",
                },
            ),
            ArtifactFileData(
                file_path="from/file2",
                file_hash="4347d0f8ba661234a8eadc005e2e1d1b646c9682",
                type="S3",
                metadata={
                    "location": "s3://bucket/path/to/file2",
                    "last_modification": "2021-08-09 10:32:12",
                },
            ),
        ]

        expected_hash = (
            "56e64245b1d4915ff27b306c8077cd4f9ce1b31233c690a93ebc38a1b737a9ea")
        self.assertEqual(expected_hash,
                         FileHasher.get_artifact_hash(artifacts))
        self.assertEqual(expected_hash,
                         FileHasher.get_artifact_hash(reversed(artifacts)))
Пример #2
0
    def setUp(self):
        self.monkeypatch = MonkeyPatch()

        self.wait = self._random_wait()
        self.op_processor = MagicMock()
        self.exp = self._create_run(processor=self.op_processor)
        self.path = self._random_path()
        self.path_str = path_to_str(self.path)

        self.artifact_hash = (
            "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855")
        self.artifact_files = [
            ArtifactFileData(
                file_path="fname.txt",
                file_hash=
                "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
                type="test",
                size=213,
                metadata={},
            ),
            ArtifactFileData(
                file_path="subdir/other.mp3",
                file_hash=
                "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
                type="test",
                metadata={},
            ),
        ]

        self.exp.set_attribute(self.path_str, Artifact(self.exp, self.path))
        self.exp._backend._containers[(self.exp._id, ContainerType.RUN)].set(
            self.path, ArtifactAttr(self.artifact_hash))
        self.exp._backend._artifacts[self.exp._project_id,
                                     self.artifact_hash] = self.artifact_files

        self._downloads = set()

        class TestArtifactDriver(ArtifactDriver):
            @classmethod
            def get_type(cls):
                return "test"

            @classmethod
            def matches(cls, path: str) -> bool:
                return False

            @classmethod
            def get_tracked_files(cls, path, destination=None):
                return []

            @classmethod
            def download_file(cls, destination: pathlib.Path,
                              file_definition: ArtifactFileData):
                destination.touch()

        self.test_artifact_driver = TestArtifactDriver
Пример #3
0
def create_artifact_version(
    swagger_client: SwaggerClient,
    project_id: str,
    artifact_hash: str,
    parent_identifier: str,
    files: List[ArtifactFileData],
    default_request_params: Dict,
) -> ArtifactModel:
    params = {
        "projectIdentifier": project_id,
        "hash": artifact_hash,
        "parentIdentifier": parent_identifier,
        "artifactFilesDTO": {
            "files": [ArtifactFileData.to_dto(a) for a in files]
        },
        **default_request_params,
    }
    try:
        result = swagger_client.api.createArtifactVersion(
            **params).response().result
        return ArtifactModel(
            hash=result.artifactHash,
            size=result.size,
            received_metadata=result.receivedMetadata,
        )
    except HTTPNotFound:
        raise ArtifactNotFoundException(artifact_hash)
Пример #4
0
    def get_tracked_files(
            cls,
            path: str,
            destination: str = None) -> typing.List[ArtifactFileData]:
        url = urlparse(path)
        bucket_name, prefix = url.netloc, url.path.lstrip("/")

        if "*" in prefix:
            raise NeptuneUnsupportedArtifactFunctionalityException(
                f"Wildcard characters (*,?) in location URI ({path}) are not supported."
            )

        # pylint: disable=no-member
        remote_storage = boto3.resource("s3").Bucket(bucket_name)

        stored_files: typing.List[ArtifactFileData] = list()

        try:
            for remote_object in remote_storage.objects.filter(Prefix=prefix):
                # If prefix is path to file get only directories
                if prefix == remote_object.key:
                    prefix = str(pathlib.PurePosixPath(prefix).parent)

                remote_key = remote_object.key
                destination = pathlib.PurePosixPath(destination or "")
                relative_file_path = remote_key[len(prefix.lstrip(".")
                                                    ):].lstrip("/")

                file_path = destination / relative_file_path

                stored_files.append(
                    ArtifactFileData(
                        file_path=str(file_path).lstrip("/"),
                        file_hash=remote_object.e_tag.strip('"'),
                        type=ArtifactFileType.S3.value,
                        size=remote_object.size,
                        metadata=cls._serialize_metadata({
                            "location":
                            f's3://{bucket_name}/{remote_key.lstrip("/")}',
                            "last_modified":
                            remote_object.last_modified,
                        }),
                    ))
        except NoCredentialsError:
            raise NeptuneRemoteStorageCredentialsException()
        except (
                remote_storage.meta.client.exceptions.NoSuchBucket,
                remote_storage.meta.client.exceptions.NoSuchKey,
        ):
            raise NeptuneRemoteStorageAccessException(location=path)

        return stored_files
Пример #5
0
    def test_non_existing_file_download(self):
        path = "/wrong/path"
        artifact_file = ArtifactFileData(
            file_path=path, file_hash="??", type="??", metadata={"file_path": path}
        )

        with self.assertRaises(
            NeptuneLocalStorageAccessException
        ), tempfile.TemporaryDirectory() as temporary:
            local_destination = Path(temporary)
            LocalArtifactDriver.download_file(
                destination=local_destination, file_definition=artifact_file
            )
Пример #6
0
 def setUp(self) -> None:
     self.artifact_hash = (
         "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
     )
     self.files = [
         ArtifactFileData(
             "fname.txt",
             "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
             "test",
             {},
         ),
     ]
     self.project_id = str(uuid.uuid4())
     self.parent_identifier = str(uuid.uuid4())
Пример #7
0
 def list_artifact_files(
     self, project_id: str, artifact_hash: str
 ) -> List[ArtifactFileData]:
     params = {
         "projectIdentifier": project_id,
         "hash": artifact_hash,
         **DEFAULT_REQUEST_KWARGS,
     }
     try:
         result = (
             self.artifacts_client.api.listArtifactFiles(**params).response().result
         )
         return [ArtifactFileData.from_dto(a) for a in result.files]
     except HTTPNotFound:
         raise ArtifactNotFoundException(artifact_hash)
Пример #8
0
    def test_file_download(self):
        artifact_file = ArtifactFileData(
            file_path="to/file1",
            file_hash="2f249230a8e7c2bf6005ccd2679259ec",
            type=ArtifactFileType.S3.value,
            metadata={"location": f"s3://{self.bucket_name}/path/to/file1"},
        )

        with tempfile.TemporaryDirectory() as temporary:
            local_destination = Path(temporary) / "target.txt"

            S3ArtifactDriver.download_file(destination=local_destination,
                                           file_definition=artifact_file)

            self.assertEqual("2f249230a8e7c2bf6005ccd2679259ec",
                             md5(local_destination))
Пример #9
0
    def get_tracked_files(
            cls,
            path: str,
            destination: str = None) -> typing.List[ArtifactFileData]:
        file_protocol_prefix = "file://"
        if path.startswith(file_protocol_prefix):
            path = path[len(file_protocol_prefix):]

        if "*" in path:
            raise NeptuneUnsupportedArtifactFunctionalityException(
                f"Wildcard characters (*,?) in location URI ({path}) are not supported."
            )

        source_location = pathlib.Path(path).expanduser()

        stored_files: typing.List[ArtifactFileData] = list()

        files_to_check = (source_location.rglob("*")
                          if source_location.is_dir() else [source_location])
        for file in files_to_check:
            # symlink dirs are omitted by rglob('*')
            if not file.is_file():
                continue

            if source_location.is_dir():
                file_path = file.relative_to(source_location).as_posix()
            else:
                file_path = file.name
            file_path = (file_path if destination is None else
                         (pathlib.Path(destination) / file_path).as_posix())

            stored_files.append(
                ArtifactFileData(
                    file_path=file_path,
                    file_hash=FileHasher.get_local_file_hash(file),
                    type=ArtifactFileType.LOCAL.value,
                    size=file.stat().st_size,
                    metadata=cls._serialize_metadata({
                        "file_path":
                        f"file://{file.resolve().as_posix()}",
                        "last_modified":
                        file.stat().st_mtime,
                    }),
                ))

        return stored_files
Пример #10
0
    def test_file_download(self):
        path = (self.test_dir / "data/file1.txt").as_posix()
        artifact_file = ArtifactFileData(
            file_path="data/file1.txt",
            file_hash="??",
            type="??",
            metadata={"file_path": f"file://{path}"},
        )

        with tempfile.TemporaryDirectory() as temporary:
            downloaded_file = Path(temporary) / "downloaded_file.ext"

            LocalArtifactDriver.download_file(
                destination=downloaded_file, file_definition=artifact_file
            )

            self.assertTrue(Path(downloaded_file).is_symlink())
            self.assertEqual("ad62f265e5b1a2dc51f531e44e748aa0", md5(downloaded_file))