示例#1
0
 def to_literal(self, ctx: FlyteContext, python_val: T,
                python_type: Type[T], expected: LiteralType) -> Literal:
     t = self.get_sub_type(python_type)
     lit_list = [
         TypeEngine.to_literal(ctx, x, t, expected.collection_type)
         for x in python_val
     ]
     return Literal(collection=LiteralCollection(literals=lit_list))
示例#2
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: typing.Union[FlyteFile, os.PathLike, str],
        python_type: typing.Type[FlyteFile],
        expected: LiteralType,
    ) -> Literal:
        remote_path = None
        should_upload = True

        if python_val is None:
            raise AssertionError("None value cannot be converted to a file.")
        if isinstance(python_val, FlyteFile):
            # If the object has a remote source, then we just convert it back.
            if python_val._remote_source is not None:
                meta = BlobMetadata(type=self._blob_type(format=self.get_format(python_type)))
                return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=python_val._remote_source)))

            source_path = python_val.path
            if python_val.remote_path is False:
                # If the user specified the remote_path to be False, that means no matter what, do not upload
                should_upload = False
            else:
                # Otherwise, if not an "" use the user-specified remote path instead of the random one
                remote_path = python_val.remote_path or None
        else:
            if not (isinstance(python_val, os.PathLike) or isinstance(python_val, str)):
                raise AssertionError(f"Expected FlyteFile or os.PathLike object, received {type(python_val)}")
            source_path = python_val

        # For remote values, say https://raw.github.com/demo_data.csv, we will not upload to Flyte's store (S3/GCS)
        # and just return a literal with a uri equal to the path given
        if ctx.file_access.is_remote(source_path) or not should_upload:
            # TODO: Add copying functionality so that FlyteFile(path="s3://a", remote_path="s3://b") will copy.
            meta = BlobMetadata(type=self._blob_type(format=FlyteFilePathTransformer.get_format(python_type)))
            return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=source_path)))

        # For local paths, we will upload to the Flyte store (note that for local execution, the remote store is just
        # a subfolder), unless remote_path=False was given
        else:
            if remote_path is None:
                remote_path = ctx.file_access.get_random_remote_path(source_path)
            ctx.file_access.put_data(source_path, remote_path, is_multipart=False)
            meta = BlobMetadata(type=self._blob_type(format=FlyteFilePathTransformer.get_format(python_type)))
            return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path or source_path)))
示例#3
0
    def to_literal(self, ctx: FlyteContext, python_val: os.PathLike,
                   python_type: Type[os.PathLike],
                   expected: LiteralType) -> Literal:
        # TODO we could guess the mimetype and allow the format to be changed at runtime. thus a non existent format
        #      could be replaced with a guess format?

        rpath = ctx.file_access.get_random_remote_path()

        # For remote values, say https://raw.github.com/demo_data.csv, we will not upload to Flyte's store (S3/GCS)
        # and just return a literal with a uri equal to the path given
        if ctx.file_access.is_remote(python_val):
            return Literal(scalar=Scalar(blob=Blob(
                metadata=BlobMetadata(expected.blob), uri=python_val)))

        # For local files, we'll upload for the user.
        ctx.file_access.put_data(python_val, rpath, is_multipart=False)
        return Literal(scalar=Scalar(
            blob=Blob(metadata=BlobMetadata(expected.blob), uri=rpath)))
示例#4
0
def _recursive_hash_placement(literal: Literal) -> Literal:
    if literal.collection is not None:
        literals = [
            _recursive_hash_placement(literal)
            for literal in literal.collection.literals
        ]
        return Literal(collection=LiteralCollection(literals=literals))
    elif literal.map is not None:
        literal_map = {}
        for key, literal in literal.map.literals.items():
            literal_map[key] = _recursive_hash_placement(literal)
        return Literal(map=LiteralMap(literal_map))

    # Base case
    if literal.hash is not None:
        return Literal(hash=literal.hash)
    else:
        return literal
示例#5
0
 def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal:
     if not dataclasses.is_dataclass(python_val):
         raise AssertionError(
             f"{type(python_val)} is not of type @dataclass, only Dataclasses are supported for "
             f"user defined datatypes in Flytekit"
         )
     if not issubclass(type(python_val), DataClassJsonMixin):
         raise AssertionError(
             f"Dataclass {python_type} should be decorated with @dataclass_json to be " f"serialized correctly"
         )
     return Literal(scalar=Scalar(generic=_json_format.Parse(python_val.to_json(), _struct.Struct())))
示例#6
0
def test_dolt_table_to_literal_error():
    s = Struct()
    s.update({"dummy": "data"})
    lv = Literal(Scalar(generic=s))

    with pytest.raises(ValueError):
        DoltTableNameTransformer.to_python_value(
            self=None,
            ctx=None,
            lv=lv,
            expected_python_type=DoltTable,
        )
示例#7
0
 def to_literal(
     self,
     ctx: FlyteContext,
     python_val: DatasetProfileView,
     python_type: Type[DatasetProfileView],
     expected: LiteralType,
 ) -> Literal:
     remote_path = ctx.file_access.get_random_remote_directory()
     local_dir = ctx.file_access.get_random_local_path()
     python_val.write(local_dir)
     ctx.file_access.upload(local_dir, remote_path)
     return Literal(scalar=Scalar(blob=Blob(uri=remote_path, metadata=BlobMetadata(type=self._TYPE_INFO))))
示例#8
0
    def to_literal(
        self, ctx: FlyteContext, python_val: typing.Any, python_type: Type[dict], expected: LiteralType
    ) -> Literal:
        if expected and expected.simple and expected.simple == SimpleType.STRUCT:
            return self.dict_to_generic_literal(python_val)

        lit_map = {}
        for k, v in python_val.items():
            if type(k) != str:
                raise ValueError("Flyte MapType expects all keys to be strings")
            k_type, v_type = self.get_dict_types(python_type)
            lit_map[k] = TypeEngine.to_literal(ctx, v, v_type, expected.map_value_type)
        return Literal(map=LiteralMap(literals=lit_map))
示例#9
0
 def to_literal(
     self,
     ctx: FlyteContext,
     python_val: pyspark.sql.DataFrame,
     python_type: Type[pyspark.sql.DataFrame],
     expected: LiteralType,
 ) -> Literal:
     remote_path = ctx.file_access.get_random_remote_directory()
     w = SparkDataFrameSchemaWriter(to_path=remote_path,
                                    cols=None,
                                    fmt=SchemaFormat.PARQUET)
     w.write(python_val)
     return Literal(scalar=Scalar(
         schema=Schema(remote_path, self._get_schema_type())))
示例#10
0
def test_file_format_getting_python_value():
    transformer = TypeEngine.get_transformer(FlyteFile)

    ctx = FlyteContext.current_context()

    # This file probably won't exist, but it's okay. It won't be downloaded unless we try to read the thing returned
    lv = Literal(scalar=Scalar(blob=Blob(metadata=BlobMetadata(
        type=BlobType(format="txt", dimensionality=0)),
                                         uri="file:///tmp/test")))

    pv = transformer.to_python_value(ctx,
                                     lv,
                                     expected_python_type=FlyteFile["txt"])
    assert isinstance(pv, FlyteFile)
    assert pv.extension() == "txt"
示例#11
0
 def to_literal(
     self,
     ctx: FlyteContext,
     python_val: pandas.DataFrame,
     python_type: Type[pandas.DataFrame],
     expected: LiteralType,
 ) -> Literal:
     local_dir = ctx.file_access.get_random_local_directory()
     w = PandasSchemaWriter(local_dir=local_dir,
                            cols=None,
                            fmt=SchemaFormat.PARQUET)
     w.write(python_val)
     remote_path = ctx.file_access.get_random_remote_directory()
     ctx.file_access.put_data(local_dir, remote_path, is_multipart=True)
     return Literal(scalar=Scalar(
         schema=Schema(remote_path, self._get_schema_type())))
示例#12
0
def test_protos():
    ctx = FlyteContext.current_context()

    pb = errors_pb2.ContainerError(code="code", message="message")
    lt = TypeEngine.to_literal_type(errors_pb2.ContainerError)
    assert lt.simple == SimpleType.STRUCT
    assert lt.metadata["pb_type"] == "flyteidl.core.errors_pb2.ContainerError"

    lit = TypeEngine.to_literal(ctx, pb, errors_pb2.ContainerError, lt)
    new_python_val = TypeEngine.to_python_value(ctx, lit, errors_pb2.ContainerError)
    assert new_python_val == pb

    # Test error
    l0 = Literal(scalar=Scalar(primitive=Primitive(integer=4)))
    with pytest.raises(AssertionError):
        TypeEngine.to_python_value(ctx, l0, errors_pb2.ContainerError)
示例#13
0
    def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal:
        meta = BlobMetadata(
            type=_core_types.BlobType(
                format=self.PYTHON_PICKLE_FORMAT, dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE
            )
        )
        # Dump the task output into pickle
        local_dir = ctx.file_access.get_random_local_directory()
        os.makedirs(local_dir, exist_ok=True)
        local_path = ctx.file_access.get_random_local_path()
        uri = os.path.join(local_dir, local_path)
        with open(uri, "w+b") as outfile:
            cloudpickle.dump(python_val, outfile)

        remote_path = ctx.file_access.get_random_remote_path(uri)
        ctx.file_access.put_data(uri, remote_path, is_multipart=False)
        return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))
示例#14
0
def test_dolt_table_to_literal(mocker):
    df = pandas.DataFrame()
    mocker.patch("dolt_integrations.core.load", return_value=None)
    mocker.patch("doltcli.Dolt", return_value=None)
    mocker.patch("pandas.read_csv", return_value=df)

    s = Struct()
    s.update({"config": {"db_path": "", "tablename": "t"}})
    lv = Literal(Scalar(generic=s))

    res = DoltTableNameTransformer.to_python_value(
        self=None,
        ctx=None,
        lv=lv,
        expected_python_type=DoltTable,
    )

    assert res.data.equals(df)
示例#15
0
    def to_literal(self, ctx: FlyteContext, python_val: np.ndarray,
                   python_type: Type[np.ndarray],
                   expected: LiteralType) -> Literal:
        meta = BlobMetadata(type=_core_types.BlobType(
            format=self.NUMPY_ARRAY_FORMAT,
            dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE))

        local_path = ctx.file_access.get_random_local_path() + ".npy"
        pathlib.Path(local_path).parent.mkdir(parents=True, exist_ok=True)

        # save numpy array to a file
        # allow_pickle=False prevents numpy from trying to save object arrays (dtype=object) using pickle
        np.save(file=local_path, arr=python_val, allow_pickle=False)

        remote_path = ctx.file_access.get_random_remote_path(local_path)
        ctx.file_access.put_data(local_path, remote_path, is_multipart=False)
        return Literal(scalar=Scalar(
            blob=Blob(metadata=meta, uri=remote_path)))
示例#16
0
 def to_literal(
     self,
     ctx: FlyteContext,
     python_val: MyDataset,
     python_type: Type[MyDataset],
     expected: LiteralType,
 ) -> Literal:
     """
     This method is used to convert from given python type object ``MyDataset`` to the Literal representation
     """
     # Step 1: lets upload all the data into a remote place recommended by Flyte
     remote_dir = ctx.file_access.get_random_remote_directory()
     ctx.file_access.upload_directory(python_val.base_dir, remote_dir)
     # Step 2: lets return a pointer to this remote_dir in the form of a literal
     return Literal(
         scalar=Scalar(
             blob=Blob(uri=remote_dir, metadata=BlobMetadata(type=self._TYPE_INFO))
         )
     )
示例#17
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: Union[FlyteFile, FlyteSchema, str],
        python_type: Type[GreatExpectationsType],
        expected: LiteralType,
    ) -> Literal:
        datatype = GreatExpectationsTypeTransformer.get_config(python_type)[0]

        if issubclass(datatype, FlyteSchema):
            return FlyteSchemaTransformer().to_literal(ctx, python_val,
                                                       datatype, expected)
        elif issubclass(datatype, FlyteFile):
            return FlyteFilePathTransformer().to_literal(
                ctx, python_val, datatype, expected)
        elif issubclass(datatype, str):
            return Literal(scalar=Scalar(primitive=Primitive(
                string_value=python_val)))
        else:
            raise TypeError(f"{datatype} is not a supported type")
示例#18
0
 def to_literal(
     self,
     ctx: FlyteContext,
     python_val: pandas.DataFrame,
     python_type: Type[pandera.typing.DataFrame],
     expected: LiteralType,
 ) -> Literal:
     if isinstance(python_val, pandas.DataFrame):
         local_dir = ctx.file_access.get_random_local_directory()
         w = PandasSchemaWriter(local_dir=local_dir,
                                cols=self._get_col_dtypes(python_type),
                                fmt=SchemaFormat.PARQUET)
         w.write(python_val)
         remote_path = ctx.file_access.get_random_remote_directory()
         ctx.file_access.put_data(local_dir, remote_path, is_multipart=True)
         return Literal(scalar=Scalar(schema=Schema(
             remote_path, self._get_schema_type(python_type))))
     else:
         raise AssertionError(
             f"Only Pandas Dataframe object can be returned from a task, returned object type {type(python_val)}"
         )
示例#19
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: PyTorchCheckpoint,
        python_type: Type[PyTorchCheckpoint],
        expected: LiteralType,
    ) -> Literal:
        meta = BlobMetadata(
            type=_core_types.BlobType(
                format=self.PYTORCH_CHECKPOINT_FORMAT, dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE
            )
        )

        local_path = ctx.file_access.get_random_local_path() + ".pt"
        pathlib.Path(local_path).parent.mkdir(parents=True, exist_ok=True)

        to_save = {}
        for field in fields(python_val):
            value = getattr(python_val, field.name)

            if value and field.name in ["module", "optimizer"]:
                to_save[field.name + "_state_dict"] = getattr(value, "state_dict")()
            elif value and field.name == "hyperparameters":
                if isinstance(value, dict):
                    to_save.update(value)
                elif isinstance(value, tuple):
                    to_save.update(value._asdict())
                elif is_dataclass(value):
                    to_save.update(asdict(value))

        if not to_save:
            raise TypeTransformerFailedError(f"Cannot save empty {python_val}")

        # save checkpoint to a file
        torch.save(to_save, local_path)

        remote_path = ctx.file_access.get_random_remote_path(local_path)
        ctx.file_access.put_data(local_path, remote_path, is_multipart=False)
        return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))
示例#20
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: ScikitLearn2ONNX,
        python_type: Type[ScikitLearn2ONNX],
        expected: LiteralType,
    ) -> Literal:
        python_type, config = extract_config(python_type)

        if config:
            remote_path = ctx.file_access.get_random_remote_path()
            local_path = to_onnx(ctx, python_val.model, config.__dict__.copy())
            ctx.file_access.put_data(local_path,
                                     remote_path,
                                     is_multipart=False)
        else:
            raise TypeTransformerFailedError(f"{python_type}'s config is None")

        return Literal(scalar=Scalar(blob=Blob(
            uri=remote_path,
            metadata=BlobMetadata(type=BlobType(
                format=self.ONNX_FORMAT,
                dimensionality=BlobType.BlobDimensionality.SINGLE)),
        )))
示例#21
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: FlyteDirectory,
        python_type: typing.Type[FlyteDirectory],
        expected: LiteralType,
    ) -> Literal:

        remote_directory = None
        should_upload = True
        meta = BlobMetadata(type=self._blob_type(
            format=self.get_format(python_type)))

        # There are two kinds of literals we handle, either an actual FlyteDirectory, or a string path to a directory.
        # Handle the FlyteDirectory case
        if isinstance(python_val, FlyteDirectory):
            # If the object has a remote source, then we just convert it back.
            if python_val._remote_source is not None:
                return Literal(scalar=Scalar(
                    blob=Blob(metadata=meta, uri=python_val._remote_source)))

            source_path = python_val.path
            # If the user specified the remote_directory to be False, that means no matter what, do not upload. Also if the
            # path given is already a remote path, say https://www.google.com, the concept of uploading to the Flyte
            # blob store doesn't make sense.
            if python_val.remote_directory is False or ctx.file_access.is_remote(
                    source_path):
                should_upload = False

            # Set the remote destination if one was given instead of triggering a random one below
            remote_directory = python_val.remote_directory or None

        # Handle the string case
        elif isinstance(python_val, pathlib.Path) or isinstance(
                python_val, str):
            source_path = str(python_val)

            if ctx.file_access.is_remote(source_path):
                should_upload = False
            else:
                p = Path(source_path)
                if not p.is_dir():
                    raise ValueError(
                        f"Expected a directory. {source_path} is not a directory"
                    )
        else:
            raise AssertionError(
                f"Expected FlyteDirectory or os.PathLike object, received {type(python_val)}"
            )

        # If we're uploading something, that means that the uri should always point to the upload destination.
        if should_upload:
            if remote_directory is None:
                remote_directory = ctx.file_access.get_random_remote_directory(
                )
            ctx.file_access.put_data(source_path,
                                     remote_directory,
                                     is_multipart=True)
            return Literal(scalar=Scalar(
                blob=Blob(metadata=meta, uri=remote_directory)))

        # If not uploading, then we can only take the original source path as the uri.
        else:
            return Literal(scalar=Scalar(
                blob=Blob(metadata=meta, uri=source_path)))
示例#22
0
from flytekit.core.context_manager import FlyteContextManager
from flytekit.core.type_engine import LiteralsResolver, TypeEngine
from flytekit.models import interface as interface_models
from flytekit.models.literals import Literal, LiteralCollection, LiteralMap, Primitive, Scalar
from flytekit.types.structured.structured_dataset import StructuredDataset


@pytest.mark.parametrize(
    "literal_value,python_type,expected_python_value",
    [
        (
            Literal(
                collection=LiteralCollection(
                    literals=[
                        Literal(scalar=Scalar(primitive=Primitive(integer=1))),
                        Literal(scalar=Scalar(primitive=Primitive(integer=2))),
                        Literal(scalar=Scalar(primitive=Primitive(integer=3))),
                    ]
                )
            ),
            typing.List[int],
            [1, 2, 3],
        ),
        (
            Literal(
                map=LiteralMap(
                    literals={
                        "k1": Literal(scalar=Scalar(primitive=Primitive(string_value="v1"))),
                        "k2": Literal(scalar=Scalar(primitive=Primitive(string_value="2"))),
                    },
                )
示例#23
0
def test_interface():
    ctx = FlyteContextManager.current_context()
    lt = TypeEngine.to_literal_type(pd.DataFrame)
    df = pd.DataFrame({"name": ["Tom", "Joseph"], "age": [20, 22]})

    annotated_sd_type = Annotated[StructuredDataset, kwtypes(name=str, age=int)]
    df_literal_type = TypeEngine.to_literal_type(annotated_sd_type)
    assert df_literal_type.structured_dataset_type is not None
    assert len(df_literal_type.structured_dataset_type.columns) == 2
    assert df_literal_type.structured_dataset_type.columns[0].name == "name"
    assert df_literal_type.structured_dataset_type.columns[0].literal_type.simple is not None
    assert df_literal_type.structured_dataset_type.columns[1].name == "age"
    assert df_literal_type.structured_dataset_type.columns[1].literal_type.simple is not None

    sd = annotated_sd_type(df)
    sd_literal = TypeEngine.to_literal(ctx, sd, python_type=annotated_sd_type, expected=lt)

    lm = {
        "my_map": Literal(
            map=LiteralMap(
                literals={
                    "k1": Literal(scalar=Scalar(primitive=Primitive(string_value="v1"))),
                    "k2": Literal(scalar=Scalar(primitive=Primitive(string_value="2"))),
                },
            )
        ),
        "my_list": Literal(
            collection=LiteralCollection(
                literals=[
                    Literal(scalar=Scalar(primitive=Primitive(integer=1))),
                    Literal(scalar=Scalar(primitive=Primitive(integer=2))),
                    Literal(scalar=Scalar(primitive=Primitive(integer=3))),
                ]
            )
        ),
        "val_a": Literal(scalar=Scalar(primitive=Primitive(integer=21828))),
        "my_df": sd_literal,
    }

    variable_map = {
        "my_map": interface_models.Variable(type=TypeEngine.to_literal_type(typing.Dict[str, str]), description=""),
        "my_list": interface_models.Variable(type=TypeEngine.to_literal_type(typing.List[int]), description=""),
        "val_a": interface_models.Variable(type=TypeEngine.to_literal_type(int), description=""),
        "my_df": interface_models.Variable(type=df_literal_type, description=""),
    }

    lr = LiteralsResolver(lm, variable_map=variable_map, ctx=ctx)
    assert lr._ctx is ctx

    with pytest.raises(ValueError):
        lr["not"]  # noqa

    with pytest.raises(ValueError):
        lr.get_literal("not")

    # Test that just using [] works, guessing from the Flyte type is invoked
    result = lr["my_list"]
    assert result == [1, 2, 3]

    # Test that using get works, guessing from the Flyte type is invoked
    result = lr.get("my_map")
    assert result == {
        "k1": "v1",
        "k2": "2",
    }

    # Getting the literal will return the Literal object itself
    assert lr.get_literal("my_df") is sd_literal

    guessed_df = lr["my_df"]
    # Based on guessing, so no column information
    assert len(guessed_df.metadata.structured_dataset_type.columns) == 0
    guessed_df_2 = lr["my_df"]
    assert guessed_df is guessed_df_2

    # Update type hints with the annotated type
    lr.update_type_hints({"my_df": annotated_sd_type})
    del lr._native_values["my_df"]
    guessed_df = lr.get("my_df")
    # Using the user specified type, so number of columns is correct.
    assert len(guessed_df.metadata.structured_dataset_type.columns) == 2
示例#24
0
def test_dict_transformer():
    d = DictTransformer()

    def assert_struct(lit: LiteralType):
        assert lit is not None
        assert lit.simple == SimpleType.STRUCT

    def recursive_assert(lit: LiteralType,
                         expected: LiteralType,
                         expected_depth: int = 1,
                         curr_depth: int = 0):
        assert curr_depth <= expected_depth
        assert lit is not None
        if lit.map_value_type is None:
            assert lit == expected
            return
        recursive_assert(lit.map_value_type, expected, expected_depth,
                         curr_depth + 1)

    # Type inference
    assert_struct(d.get_literal_type(dict))
    assert_struct(d.get_literal_type(typing.Dict[int, int]))
    recursive_assert(d.get_literal_type(typing.Dict[str, str]),
                     LiteralType(simple=SimpleType.STRING))
    recursive_assert(d.get_literal_type(typing.Dict[str, int]),
                     LiteralType(simple=SimpleType.INTEGER))
    recursive_assert(d.get_literal_type(typing.Dict[str, datetime.datetime]),
                     LiteralType(simple=SimpleType.DATETIME))
    recursive_assert(d.get_literal_type(typing.Dict[str, datetime.timedelta]),
                     LiteralType(simple=SimpleType.DURATION))
    recursive_assert(d.get_literal_type(typing.Dict[str, dict]),
                     LiteralType(simple=SimpleType.STRUCT))
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[str, str]]),
        LiteralType(simple=SimpleType.STRING),
        expected_depth=2,
    )
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[int, str]]),
        LiteralType(simple=SimpleType.STRUCT),
        expected_depth=2,
    )
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[str,
                                                        typing.Dict[str,
                                                                    str]]]),
        LiteralType(simple=SimpleType.STRING),
        expected_depth=3,
    )
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[str,
                                                        typing.Dict[str,
                                                                    dict]]]),
        LiteralType(simple=SimpleType.STRUCT),
        expected_depth=3,
    )
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[str,
                                                        typing.Dict[int,
                                                                    dict]]]),
        LiteralType(simple=SimpleType.STRUCT),
        expected_depth=2,
    )

    ctx = FlyteContext.current_context()

    lit = d.to_literal(ctx, {}, typing.Dict, LiteralType(SimpleType.STRUCT))
    pv = d.to_python_value(ctx, lit, typing.Dict)
    assert pv == {}

    # Literal to python
    with pytest.raises(TypeError):
        d.to_python_value(
            ctx, Literal(scalar=Scalar(primitive=Primitive(integer=10))), dict)
    with pytest.raises(TypeError):
        d.to_python_value(ctx, Literal(), dict)
    with pytest.raises(TypeError):
        d.to_python_value(ctx, Literal(map=LiteralMap(literals={"x": None})),
                          dict)
    with pytest.raises(TypeError):
        d.to_python_value(ctx, Literal(map=LiteralMap(literals={"x": None})),
                          typing.Dict[int, str])

    d.to_python_value(
        ctx,
        Literal(map=LiteralMap(
            literals={
                "x": Literal(scalar=Scalar(primitive=Primitive(integer=1)))
            })),
        typing.Dict[str, int],
    )
示例#25
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: Union[StructuredDataset, typing.Any],
        python_type: Union[Type[StructuredDataset], Type],
        expected: LiteralType,
    ) -> Literal:
        # Make a copy in case we need to hand off to encoders, since we can't be sure of mutations.
        # Check first to see if it's even an SD type. For backwards compatibility, we may be getting a FlyteSchema
        python_type, *attrs = extract_cols_and_format(python_type)
        # In case it's a FlyteSchema
        sdt = StructuredDatasetType(
            format=self.DEFAULT_FORMATS.get(python_type, None))

        if expected and expected.structured_dataset_type:
            sdt = StructuredDatasetType(
                columns=expected.structured_dataset_type.columns,
                format=expected.structured_dataset_type.format,
                external_schema_type=expected.structured_dataset_type.
                external_schema_type,
                external_schema_bytes=expected.structured_dataset_type.
                external_schema_bytes,
            )

        # If the type signature has the StructuredDataset class, it will, or at least should, also be a
        # StructuredDataset instance.
        if issubclass(python_type, StructuredDataset) and isinstance(
                python_val, StructuredDataset):
            # There are three cases that we need to take care of here.

            # 1. A task returns a StructuredDataset that was just a passthrough input. If this happens
            # then return the original literals.StructuredDataset without invoking any encoder
            #
            # Ex.
            #   def t1(dataset: Annotated[StructuredDataset, my_cols]) -> Annotated[StructuredDataset, my_cols]:
            #       return dataset
            if python_val._literal_sd is not None:
                if python_val.dataframe is not None:
                    raise ValueError(
                        f"Shouldn't have specified both literal {python_val._literal_sd} and dataframe {python_val.dataframe}"
                    )
                return Literal(scalar=Scalar(
                    structured_dataset=python_val._literal_sd))

            # 2. A task returns a python StructuredDataset with a uri.
            # Note: this case is also what happens we start a local execution of a task with a python StructuredDataset.
            #  It gets converted into a literal first, then back into a python StructuredDataset.
            #
            # Ex.
            #   def t2(uri: str) -> Annotated[StructuredDataset, my_cols]
            #       return StructuredDataset(uri=uri)
            if python_val.dataframe is None:
                if not python_val.uri:
                    raise ValueError(
                        f"If dataframe is not specified, then the uri should be specified. {python_val}"
                    )
                sd_model = literals.StructuredDataset(
                    uri=python_val.uri,
                    metadata=StructuredDatasetMetadata(
                        structured_dataset_type=sdt),
                )
                return Literal(scalar=Scalar(structured_dataset=sd_model))

            # 3. This is the third and probably most common case. The python StructuredDataset object wraps a dataframe
            # that we will need to invoke an encoder for. Figure out which encoder to call and invoke it.
            df_type = type(python_val.dataframe)
            if python_val.uri is None:
                protocol = self.DEFAULT_PROTOCOLS[df_type]
            else:
                protocol = protocol_prefix(python_val.uri)
            return self.encode(
                ctx,
                python_val,
                df_type,
                protocol,
                sdt.format or typing.cast(StructuredDataset,
                                          python_val).DEFAULT_FILE_FORMAT,
                sdt,
            )

        # Otherwise assume it's a dataframe instance. Wrap it with some defaults
        fmt = self.DEFAULT_FORMATS[python_type]
        protocol = self.DEFAULT_PROTOCOLS[python_type]
        meta = StructuredDatasetMetadata(
            structured_dataset_type=expected.
            structured_dataset_type if expected else None)

        sd = StructuredDataset(dataframe=python_val, metadata=meta)
        return self.encode(ctx, sd, python_type, protocol, fmt, sdt)
示例#26
0
def _register_default_type_transformers():
    TypeEngine.register(
        SimpleTransformer(
            "int",
            int,
            _primitives.Integer.to_flyte_literal_type(),
            lambda x: Literal(scalar=Scalar(primitive=Primitive(integer=x))),
            lambda x: x.scalar.primitive.integer,
        ))

    TypeEngine.register(
        SimpleTransformer(
            "float",
            float,
            _primitives.Float.to_flyte_literal_type(),
            lambda x: Literal(scalar=Scalar(primitive=Primitive(float_value=x))
                              ),
            _check_and_covert_float,
        ))

    TypeEngine.register(
        SimpleTransformer(
            "bool",
            bool,
            _primitives.Boolean.to_flyte_literal_type(),
            lambda x: Literal(scalar=Scalar(primitive=Primitive(boolean=x))),
            lambda x: x.scalar.primitive.boolean,
        ))

    TypeEngine.register(
        SimpleTransformer(
            "str",
            str,
            _primitives.String.to_flyte_literal_type(),
            lambda x: Literal(scalar=Scalar(primitive=Primitive(string_value=x)
                                            )),
            lambda x: x.scalar.primitive.string_value,
        ))

    TypeEngine.register(
        SimpleTransformer(
            "datetime",
            _datetime.datetime,
            _primitives.Datetime.to_flyte_literal_type(),
            lambda x: Literal(scalar=Scalar(primitive=Primitive(datetime=x))),
            lambda x: x.scalar.primitive.datetime,
        ))

    TypeEngine.register(
        SimpleTransformer(
            "timedelta",
            _datetime.timedelta,
            _primitives.Timedelta.to_flyte_literal_type(),
            lambda x: Literal(scalar=Scalar(primitive=Primitive(duration=x))),
            lambda x: x.scalar.primitive.duration,
        ))

    TypeEngine.register(
        SimpleTransformer(
            "none",
            None,
            _type_models.LiteralType(simple=_type_models.SimpleType.NONE),
            lambda x: None,
            lambda x: None,
        ))
    TypeEngine.register(ListTransformer())
    TypeEngine.register(DictTransformer())
    TypeEngine.register(TextIOTransformer())
    TypeEngine.register(PathLikeTransformer())
    TypeEngine.register(BinaryIOTransformer())

    # inner type is. Also unsupported are typing's Tuples. Even though you can look inside them, Flyte's type system
    # doesn't support these currently.
    # Confusing note: typing.NamedTuple is in here even though task functions themselves can return them. We just mean
    # that the return signature of a task can be a NamedTuple that contains another NamedTuple inside it.
    # Also, it's not entirely true that Flyte IDL doesn't support tuples. We can always fake them as structs, but we'll
    # hold off on doing that for now, as we may amend the IDL formally to support tuples.
    TypeEngine.register(RestrictedType("non typed tuple", tuple))
    TypeEngine.register(RestrictedType("non typed tuple", typing.Tuple))
    TypeEngine.register(RestrictedType("named tuple", typing.NamedTuple))
示例#27
0
 def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal:
     return Literal(scalar=Scalar(primitive=Primitive(string_value=python_val.value)))
示例#28
0
 def dict_to_generic_literal(v: dict) -> Literal:
     return Literal(scalar=Scalar(
         generic=_json_format.Parse(_json.dumps(v), _struct.Struct())))
示例#29
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: typing.Union[FlyteFile, os.PathLike, str],
        python_type: typing.Type[FlyteFile],
        expected: LiteralType,
    ) -> Literal:
        remote_path = None
        should_upload = True

        if python_val is None:
            raise TypeTransformerFailedError("None value cannot be converted to a file.")

        if not (python_type is os.PathLike or issubclass(python_type, FlyteFile)):
            raise ValueError(f"Incorrect type {python_type}, must be either a FlyteFile or os.PathLike")

        # information used by all cases
        meta = BlobMetadata(type=self._blob_type(format=FlyteFilePathTransformer.get_format(python_type)))

        if isinstance(python_val, FlyteFile):
            source_path = python_val.path

            # If the object has a remote source, then we just convert it back. This means that if someone is just
            # going back and forth between a FlyteFile Python value and a Blob Flyte IDL value, we don't do anything.
            if python_val._remote_source is not None:
                return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=python_val._remote_source)))

            # If the user specified the remote_path to be False, that means no matter what, do not upload. Also if the
            # path given is already a remote path, say https://www.google.com, the concept of uploading to the Flyte
            # blob store doesn't make sense.
            if python_val.remote_path is False or ctx.file_access.is_remote(source_path):
                should_upload = False
            # If the type that's given is a simpler type, we also don't upload, and print a warning too.
            if python_type is os.PathLike:
                logger.warning(
                    f"Converting from a FlyteFile Python instance to a Blob Flyte object, but only a {python_type} was"
                    f" specified. Since a simpler type was specified, we'll skip uploading!"
                )
                should_upload = False

            # Set the remote destination if one was given instead of triggering a random one below
            remote_path = python_val.remote_path or None

        elif isinstance(python_val, pathlib.Path) or isinstance(python_val, str):
            source_path = str(python_val)
            if issubclass(python_type, FlyteFile):
                if ctx.file_access.is_remote(source_path):
                    should_upload = False
                else:
                    if isinstance(python_val, pathlib.Path) and not python_val.is_file():
                        raise ValueError(f"Error converting pathlib.Path {python_val} because it's not a file.")

                    # If it's a string pointing to a local destination, then make sure it's a file.
                    if isinstance(python_val, str):
                        p = pathlib.Path(python_val)
                        if not p.is_file():
                            raise TypeTransformerFailedError(f"Error converting {python_val} because it's not a file.")
            # python_type must be os.PathLike - see check at beginning of function
            else:
                should_upload = False

        else:
            raise TypeTransformerFailedError(f"Expected FlyteFile or os.PathLike object, received {type(python_val)}")

        # If we're uploading something, that means that the uri should always point to the upload destination.
        if should_upload:
            if remote_path is None:
                remote_path = ctx.file_access.get_random_remote_path(source_path)
            ctx.file_access.put_data(source_path, remote_path, is_multipart=False)
            return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))
        # If not uploading, then we can only take the original source path as the uri.
        else:
            return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=source_path)))
示例#30
0
 def to_literal(self, ctx: FlyteContext, python_val: T,
                python_type: Type[T], expected: LiteralType) -> Literal:
     struct = Struct()
     struct.update(_MessageToDict(python_val))
     return Literal(scalar=Scalar(generic=struct))