Пример #1
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: FlyteDirectory,
        python_type: typing.Type[FlyteDirectory],
        expected: LiteralType,
    ) -> Literal:

        remote_directory = None
        should_upload = True

        # There are two kinds of literals we handle, either an actual FlyteDirectory, or a string path to a directory.
        # Handle the FlyteDirectory case
        if isinstance(python_val, FlyteDirectory):
            source_path = python_val.path
            if python_val.remote_directory is False:
                # If the user specified the remote_path to be False, that means no matter what, do not upload
                should_upload = False
            else:
                # Otherwise, if not an "" use the user-specified remote path instead of the random one
                remote_directory = python_val.remote_directory or None

        # Handle the string case
        else:
            if not (isinstance(python_val, os.PathLike)
                    or isinstance(python_val, str)):
                raise AssertionError(
                    f"Expected FlyteDirectory or os.PathLike object, received {type(python_val)}"
                )

            source_path = python_val
            # Only do this check if it's a local directory.
            if not ctx.file_access.is_remote(source_path):
                p = Path(source_path)
                if not p.is_dir():
                    raise AssertionError(
                        f"Expected a directory. {source_path} is not a directory"
                    )

        # For remote values, say s3://some/extant/dir/, we will not upload to Flyte's store (S3/GCS)
        # and just return a literal with a uri equal to the path given
        if ctx.file_access.is_remote(source_path) or not should_upload:
            meta = BlobMetadata(type=self._blob_type(
                format=self.get_format(python_type)))
            return Literal(scalar=Scalar(
                blob=Blob(metadata=meta, uri=source_path)))

        # For local paths, we will upload to the Flyte store (note that for local execution, the remote store is just
        # a subfolder), unless remote_path=False was given
        else:
            if remote_directory is None:
                remote_directory = ctx.file_access.get_random_remote_directory(
                )
            ctx.file_access.put_data(source_path,
                                     remote_directory,
                                     is_multipart=True)
            meta = BlobMetadata(type=self._blob_type(
                format=self.get_format(python_type)))
            return Literal(scalar=Scalar(
                blob=Blob(metadata=meta, uri=remote_directory)))
Пример #2
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: typing.Union[FlyteFile, os.PathLike, str],
        python_type: typing.Type[FlyteFile],
        expected: LiteralType,
    ) -> Literal:
        remote_path = None
        should_upload = True

        if python_val is None:
            raise AssertionError("None value cannot be converted to a file.")
        if isinstance(python_val, FlyteFile):
            # If the object has a remote source, then we just convert it back.
            if python_val._remote_source is not None:
                meta = BlobMetadata(type=self._blob_type(
                    format=self.get_format(python_type)))
                return Literal(scalar=Scalar(
                    blob=Blob(metadata=meta, uri=python_val._remote_source)))

            source_path = python_val.path
            if python_val.remote_path is False:
                # If the user specified the remote_path to be False, that means no matter what, do not upload
                should_upload = False
            else:
                # Otherwise, if not an "" use the user-specified remote path instead of the random one
                remote_path = python_val.remote_path or None
        else:
            if not (isinstance(python_val, os.PathLike)
                    or isinstance(python_val, str)):
                raise AssertionError(
                    f"Expected FlyteFile or os.PathLike object, received {type(python_val)}"
                )
            source_path = python_val

        # For remote values, say https://raw.github.com/demo_data.csv, we will not upload to Flyte's store (S3/GCS)
        # and just return a literal with a uri equal to the path given
        if ctx.file_access.is_remote(source_path) or not should_upload:
            # TODO: Add copying functionality so that FlyteFile(path="s3://a", remote_path="s3://b") will copy.
            meta = BlobMetadata(type=self._blob_type(
                format=FlyteFilePathTransformer.get_format(python_type)))
            return Literal(scalar=Scalar(
                blob=Blob(metadata=meta, uri=source_path)))

        # For local paths, we will upload to the Flyte store (note that for local execution, the remote store is just
        # a subfolder), unless remote_path=False was given
        else:
            if remote_path is None:
                remote_path = ctx.file_access.get_random_remote_path(
                    source_path)
            ctx.file_access.put_data(source_path,
                                     remote_path,
                                     is_multipart=False)
            meta = BlobMetadata(type=self._blob_type(
                format=FlyteFilePathTransformer.get_format(python_type)))
            return Literal(scalar=Scalar(
                blob=Blob(metadata=meta, uri=remote_path or source_path)))
Пример #3
0
    def to_literal(
        self, ctx: FlyteContext, python_val: os.PathLike, python_type: Type[os.PathLike], expected: LiteralType
    ) -> Literal:
        # TODO we could guess the mimetype and allow the format to be changed at runtime. thus a non existent format
        #      could be replaced with a guess format?

        rpath = ctx.file_access.get_random_remote_path()

        # For remote values, say https://raw.github.com/demo_data.csv, we will not upload to Flyte's store (S3/GCS)
        # and just return a literal with a uri equal to the path given
        if ctx.file_access.is_remote(python_val):
            return Literal(scalar=Scalar(blob=Blob(metadata=BlobMetadata(expected.blob), uri=python_val)))

        # For local files, we'll upload for the user.
        ctx.file_access.put_data(python_val, rpath, is_multipart=False)
        return Literal(scalar=Scalar(blob=Blob(metadata=BlobMetadata(expected.blob), uri=rpath)))
Пример #4
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: TensorFlow2ONNX,
        python_type: Type[TensorFlow2ONNX],
        expected: LiteralType,
    ) -> Literal:
        python_type, config = extract_config(python_type)

        if config:
            remote_path = ctx.file_access.get_random_remote_path()
            local_path = to_onnx(ctx, python_val.model, config.__dict__.copy())
            ctx.file_access.put_data(local_path, remote_path, is_multipart=False)
        else:
            raise TypeTransformerFailedError(f"{python_type}'s config is None")

        return Literal(
            scalar=Scalar(
                blob=Blob(
                    uri=remote_path,
                    metadata=BlobMetadata(
                        type=BlobType(format=self.ONNX_FORMAT, dimensionality=BlobType.BlobDimensionality.SINGLE)
                    ),
                )
            )
        )
Пример #5
0
    def convert_to_blob(
        self,
        ctx: typing.Optional[click.Context],
        param: typing.Optional[click.Parameter],
        value: typing.Union[Directory, FileParam],
    ) -> Literal:
        if isinstance(value, Directory):
            uri = self.get_uri_for_dir(value)
        else:
            uri = value.filepath
            if self._remote and value.local:
                fp = pathlib.Path(value.filepath)
                md5, _ = script_mode.hash_file(value.filepath)
                df_remote_location = self._create_upload_fn(filename=fp.name,
                                                            content_md5=md5)
                self._flyte_ctx.file_access.put_data(
                    fp, df_remote_location.signed_url)
                uri = df_remote_location.native_url

        lit = Literal(scalar=Scalar(blob=Blob(
            metadata=BlobMetadata(type=self._literal_type.blob),
            uri=uri,
        ), ), )

        return lit
Пример #6
0
 def to_literal(
     self,
     ctx: FlyteContext,
     python_val: DatasetProfileView,
     python_type: Type[DatasetProfileView],
     expected: LiteralType,
 ) -> Literal:
     remote_path = ctx.file_access.get_random_remote_directory()
     local_dir = ctx.file_access.get_random_local_path()
     python_val.write(local_dir)
     ctx.file_access.upload(local_dir, remote_path)
     return Literal(scalar=Scalar(blob=Blob(uri=remote_path, metadata=BlobMetadata(type=self._TYPE_INFO))))
Пример #7
0
def test_file_format_getting_python_value():
    transformer = TypeEngine.get_transformer(FlyteFile)

    ctx = FlyteContext.current_context()

    # This file probably won't exist, but it's okay. It won't be downloaded unless we try to read the thing returned
    lv = Literal(scalar=Scalar(blob=Blob(metadata=BlobMetadata(
        type=BlobType(format="txt", dimensionality=0)),
                                         uri="file:///tmp/test")))

    pv = transformer.to_python_value(ctx,
                                     lv,
                                     expected_python_type=FlyteFile["txt"])
    assert isinstance(pv, FlyteFile)
    assert pv.extension() == "txt"
Пример #8
0
    def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal:
        meta = BlobMetadata(
            type=_core_types.BlobType(
                format=self.PYTHON_PICKLE_FORMAT, dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE
            )
        )
        # Dump the task output into pickle
        local_dir = ctx.file_access.get_random_local_directory()
        os.makedirs(local_dir, exist_ok=True)
        local_path = ctx.file_access.get_random_local_path()
        uri = os.path.join(local_dir, local_path)
        with open(uri, "w+b") as outfile:
            cloudpickle.dump(python_val, outfile)

        remote_path = ctx.file_access.get_random_remote_path(uri)
        ctx.file_access.put_data(uri, remote_path, is_multipart=False)
        return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))
Пример #9
0
    def to_literal(self, ctx: FlyteContext, python_val: np.ndarray,
                   python_type: Type[np.ndarray],
                   expected: LiteralType) -> Literal:
        meta = BlobMetadata(type=_core_types.BlobType(
            format=self.NUMPY_ARRAY_FORMAT,
            dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE))

        local_path = ctx.file_access.get_random_local_path() + ".npy"
        pathlib.Path(local_path).parent.mkdir(parents=True, exist_ok=True)

        # save numpy array to a file
        # allow_pickle=False prevents numpy from trying to save object arrays (dtype=object) using pickle
        np.save(file=local_path, arr=python_val, allow_pickle=False)

        remote_path = ctx.file_access.get_random_remote_path(local_path)
        ctx.file_access.put_data(local_path, remote_path, is_multipart=False)
        return Literal(scalar=Scalar(
            blob=Blob(metadata=meta, uri=remote_path)))
Пример #10
0
 def to_literal(
     self,
     ctx: FlyteContext,
     python_val: MyDataset,
     python_type: Type[MyDataset],
     expected: LiteralType,
 ) -> Literal:
     """
     This method is used to convert from given python type object ``MyDataset`` to the Literal representation
     """
     # Step 1: lets upload all the data into a remote place recommended by Flyte
     remote_dir = ctx.file_access.get_random_remote_directory()
     ctx.file_access.upload_directory(python_val.base_dir, remote_dir)
     # Step 2: lets return a pointer to this remote_dir in the form of a literal
     return Literal(
         scalar=Scalar(
             blob=Blob(uri=remote_dir, metadata=BlobMetadata(type=self._TYPE_INFO))
         )
     )
Пример #11
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: PyTorchCheckpoint,
        python_type: Type[PyTorchCheckpoint],
        expected: LiteralType,
    ) -> Literal:
        meta = BlobMetadata(
            type=_core_types.BlobType(
                format=self.PYTORCH_CHECKPOINT_FORMAT, dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE
            )
        )

        local_path = ctx.file_access.get_random_local_path() + ".pt"
        pathlib.Path(local_path).parent.mkdir(parents=True, exist_ok=True)

        to_save = {}
        for field in fields(python_val):
            value = getattr(python_val, field.name)

            if value and field.name in ["module", "optimizer"]:
                to_save[field.name + "_state_dict"] = getattr(value, "state_dict")()
            elif value and field.name == "hyperparameters":
                if isinstance(value, dict):
                    to_save.update(value)
                elif isinstance(value, tuple):
                    to_save.update(value._asdict())
                elif is_dataclass(value):
                    to_save.update(asdict(value))

        if not to_save:
            raise TypeTransformerFailedError(f"Cannot save empty {python_val}")

        # save checkpoint to a file
        torch.save(to_save, local_path)

        remote_path = ctx.file_access.get_random_remote_path(local_path)
        ctx.file_access.put_data(local_path, remote_path, is_multipart=False)
        return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))
Пример #12
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: FlyteDirectory,
        python_type: typing.Type[FlyteDirectory],
        expected: LiteralType,
    ) -> Literal:

        remote_directory = None
        should_upload = True
        meta = BlobMetadata(type=self._blob_type(
            format=self.get_format(python_type)))

        # There are two kinds of literals we handle, either an actual FlyteDirectory, or a string path to a directory.
        # Handle the FlyteDirectory case
        if isinstance(python_val, FlyteDirectory):
            # If the object has a remote source, then we just convert it back.
            if python_val._remote_source is not None:
                return Literal(scalar=Scalar(
                    blob=Blob(metadata=meta, uri=python_val._remote_source)))

            source_path = python_val.path
            # If the user specified the remote_directory to be False, that means no matter what, do not upload. Also if the
            # path given is already a remote path, say https://www.google.com, the concept of uploading to the Flyte
            # blob store doesn't make sense.
            if python_val.remote_directory is False or ctx.file_access.is_remote(
                    source_path):
                should_upload = False

            # Set the remote destination if one was given instead of triggering a random one below
            remote_directory = python_val.remote_directory or None

        # Handle the string case
        elif isinstance(python_val, pathlib.Path) or isinstance(
                python_val, str):
            source_path = str(python_val)

            if ctx.file_access.is_remote(source_path):
                should_upload = False
            else:
                p = Path(source_path)
                if not p.is_dir():
                    raise ValueError(
                        f"Expected a directory. {source_path} is not a directory"
                    )
        else:
            raise AssertionError(
                f"Expected FlyteDirectory or os.PathLike object, received {type(python_val)}"
            )

        # If we're uploading something, that means that the uri should always point to the upload destination.
        if should_upload:
            if remote_directory is None:
                remote_directory = ctx.file_access.get_random_remote_directory(
                )
            ctx.file_access.put_data(source_path,
                                     remote_directory,
                                     is_multipart=True)
            return Literal(scalar=Scalar(
                blob=Blob(metadata=meta, uri=remote_directory)))

        # If not uploading, then we can only take the original source path as the uri.
        else:
            return Literal(scalar=Scalar(
                blob=Blob(metadata=meta, uri=source_path)))
Пример #13
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: typing.Union[FlyteFile, os.PathLike, str],
        python_type: typing.Type[FlyteFile],
        expected: LiteralType,
    ) -> Literal:
        remote_path = None
        should_upload = True

        if python_val is None:
            raise TypeTransformerFailedError("None value cannot be converted to a file.")

        if not (python_type is os.PathLike or issubclass(python_type, FlyteFile)):
            raise ValueError(f"Incorrect type {python_type}, must be either a FlyteFile or os.PathLike")

        # information used by all cases
        meta = BlobMetadata(type=self._blob_type(format=FlyteFilePathTransformer.get_format(python_type)))

        if isinstance(python_val, FlyteFile):
            source_path = python_val.path

            # If the object has a remote source, then we just convert it back. This means that if someone is just
            # going back and forth between a FlyteFile Python value and a Blob Flyte IDL value, we don't do anything.
            if python_val._remote_source is not None:
                return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=python_val._remote_source)))

            # If the user specified the remote_path to be False, that means no matter what, do not upload. Also if the
            # path given is already a remote path, say https://www.google.com, the concept of uploading to the Flyte
            # blob store doesn't make sense.
            if python_val.remote_path is False or ctx.file_access.is_remote(source_path):
                should_upload = False
            # If the type that's given is a simpler type, we also don't upload, and print a warning too.
            if python_type is os.PathLike:
                logger.warning(
                    f"Converting from a FlyteFile Python instance to a Blob Flyte object, but only a {python_type} was"
                    f" specified. Since a simpler type was specified, we'll skip uploading!"
                )
                should_upload = False

            # Set the remote destination if one was given instead of triggering a random one below
            remote_path = python_val.remote_path or None

        elif isinstance(python_val, pathlib.Path) or isinstance(python_val, str):
            source_path = str(python_val)
            if issubclass(python_type, FlyteFile):
                if ctx.file_access.is_remote(source_path):
                    should_upload = False
                else:
                    if isinstance(python_val, pathlib.Path) and not python_val.is_file():
                        raise ValueError(f"Error converting pathlib.Path {python_val} because it's not a file.")

                    # If it's a string pointing to a local destination, then make sure it's a file.
                    if isinstance(python_val, str):
                        p = pathlib.Path(python_val)
                        if not p.is_file():
                            raise TypeTransformerFailedError(f"Error converting {python_val} because it's not a file.")
            # python_type must be os.PathLike - see check at beginning of function
            else:
                should_upload = False

        else:
            raise TypeTransformerFailedError(f"Expected FlyteFile or os.PathLike object, received {type(python_val)}")

        # If we're uploading something, that means that the uri should always point to the upload destination.
        if should_upload:
            if remote_path is None:
                remote_path = ctx.file_access.get_random_remote_path(source_path)
            ctx.file_access.put_data(source_path, remote_path, is_multipart=False)
            return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))
        # If not uploading, then we can only take the original source path as the uri.
        else:
            return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=source_path)))