Exemplo n.º 1
0
 def to_literal(self, ctx: FlyteContext, python_val: T,
                python_type: Type[T], expected: LiteralType) -> Literal:
     t = self.get_sub_type(python_type)
     lit_list = [
         TypeEngine.to_literal(ctx, x, t, expected.collection_type)
         for x in python_val
     ]
     return Literal(collection=LiteralCollection(literals=lit_list))
Exemplo n.º 2
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: typing.Union[FlyteFile, os.PathLike, str],
        python_type: typing.Type[FlyteFile],
        expected: LiteralType,
    ) -> Literal:
        remote_path = None
        should_upload = True

        if python_val is None:
            raise AssertionError("None value cannot be converted to a file.")
        if isinstance(python_val, FlyteFile):
            # If the object has a remote source, then we just convert it back.
            if python_val._remote_source is not None:
                meta = BlobMetadata(type=self._blob_type(format=self.get_format(python_type)))
                return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=python_val._remote_source)))

            source_path = python_val.path
            if python_val.remote_path is False:
                # If the user specified the remote_path to be False, that means no matter what, do not upload
                should_upload = False
            else:
                # Otherwise, if not an "" use the user-specified remote path instead of the random one
                remote_path = python_val.remote_path or None
        else:
            if not (isinstance(python_val, os.PathLike) or isinstance(python_val, str)):
                raise AssertionError(f"Expected FlyteFile or os.PathLike object, received {type(python_val)}")
            source_path = python_val

        # For remote values, say https://raw.github.com/demo_data.csv, we will not upload to Flyte's store (S3/GCS)
        # and just return a literal with a uri equal to the path given
        if ctx.file_access.is_remote(source_path) or not should_upload:
            # TODO: Add copying functionality so that FlyteFile(path="s3://a", remote_path="s3://b") will copy.
            meta = BlobMetadata(type=self._blob_type(format=FlyteFilePathTransformer.get_format(python_type)))
            return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=source_path)))

        # For local paths, we will upload to the Flyte store (note that for local execution, the remote store is just
        # a subfolder), unless remote_path=False was given
        else:
            if remote_path is None:
                remote_path = ctx.file_access.get_random_remote_path(source_path)
            ctx.file_access.put_data(source_path, remote_path, is_multipart=False)
            meta = BlobMetadata(type=self._blob_type(format=FlyteFilePathTransformer.get_format(python_type)))
            return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path or source_path)))
Exemplo n.º 3
0
    def to_literal(self, ctx: FlyteContext, python_val: os.PathLike,
                   python_type: Type[os.PathLike],
                   expected: LiteralType) -> Literal:
        # TODO we could guess the mimetype and allow the format to be changed at runtime. thus a non existent format
        #      could be replaced with a guess format?

        rpath = ctx.file_access.get_random_remote_path()

        # For remote values, say https://raw.github.com/demo_data.csv, we will not upload to Flyte's store (S3/GCS)
        # and just return a literal with a uri equal to the path given
        if ctx.file_access.is_remote(python_val):
            return Literal(scalar=Scalar(blob=Blob(
                metadata=BlobMetadata(expected.blob), uri=python_val)))

        # For local files, we'll upload for the user.
        ctx.file_access.put_data(python_val, rpath, is_multipart=False)
        return Literal(scalar=Scalar(
            blob=Blob(metadata=BlobMetadata(expected.blob), uri=rpath)))
Exemplo n.º 4
0
def _recursive_hash_placement(literal: Literal) -> Literal:
    if literal.collection is not None:
        literals = [
            _recursive_hash_placement(literal)
            for literal in literal.collection.literals
        ]
        return Literal(collection=LiteralCollection(literals=literals))
    elif literal.map is not None:
        literal_map = {}
        for key, literal in literal.map.literals.items():
            literal_map[key] = _recursive_hash_placement(literal)
        return Literal(map=LiteralMap(literal_map))

    # Base case
    if literal.hash is not None:
        return Literal(hash=literal.hash)
    else:
        return literal
Exemplo n.º 5
0
 def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal:
     if not dataclasses.is_dataclass(python_val):
         raise AssertionError(
             f"{type(python_val)} is not of type @dataclass, only Dataclasses are supported for "
             f"user defined datatypes in Flytekit"
         )
     if not issubclass(type(python_val), DataClassJsonMixin):
         raise AssertionError(
             f"Dataclass {python_type} should be decorated with @dataclass_json to be " f"serialized correctly"
         )
     return Literal(scalar=Scalar(generic=_json_format.Parse(python_val.to_json(), _struct.Struct())))
Exemplo n.º 6
0
def test_dolt_table_to_literal_error():
    s = Struct()
    s.update({"dummy": "data"})
    lv = Literal(Scalar(generic=s))

    with pytest.raises(ValueError):
        DoltTableNameTransformer.to_python_value(
            self=None,
            ctx=None,
            lv=lv,
            expected_python_type=DoltTable,
        )
Exemplo n.º 7
0
 def to_literal(
     self,
     ctx: FlyteContext,
     python_val: DatasetProfileView,
     python_type: Type[DatasetProfileView],
     expected: LiteralType,
 ) -> Literal:
     remote_path = ctx.file_access.get_random_remote_directory()
     local_dir = ctx.file_access.get_random_local_path()
     python_val.write(local_dir)
     ctx.file_access.upload(local_dir, remote_path)
     return Literal(scalar=Scalar(blob=Blob(uri=remote_path, metadata=BlobMetadata(type=self._TYPE_INFO))))
Exemplo n.º 8
0
    def to_literal(
        self, ctx: FlyteContext, python_val: typing.Any, python_type: Type[dict], expected: LiteralType
    ) -> Literal:
        if expected and expected.simple and expected.simple == SimpleType.STRUCT:
            return self.dict_to_generic_literal(python_val)

        lit_map = {}
        for k, v in python_val.items():
            if type(k) != str:
                raise ValueError("Flyte MapType expects all keys to be strings")
            k_type, v_type = self.get_dict_types(python_type)
            lit_map[k] = TypeEngine.to_literal(ctx, v, v_type, expected.map_value_type)
        return Literal(map=LiteralMap(literals=lit_map))
Exemplo n.º 9
0
 def to_literal(
     self,
     ctx: FlyteContext,
     python_val: pyspark.sql.DataFrame,
     python_type: Type[pyspark.sql.DataFrame],
     expected: LiteralType,
 ) -> Literal:
     remote_path = ctx.file_access.get_random_remote_directory()
     w = SparkDataFrameSchemaWriter(to_path=remote_path,
                                    cols=None,
                                    fmt=SchemaFormat.PARQUET)
     w.write(python_val)
     return Literal(scalar=Scalar(
         schema=Schema(remote_path, self._get_schema_type())))
Exemplo n.º 10
0
def test_file_format_getting_python_value():
    transformer = TypeEngine.get_transformer(FlyteFile)

    ctx = FlyteContext.current_context()

    # This file probably won't exist, but it's okay. It won't be downloaded unless we try to read the thing returned
    lv = Literal(scalar=Scalar(blob=Blob(metadata=BlobMetadata(
        type=BlobType(format="txt", dimensionality=0)),
                                         uri="file:///tmp/test")))

    pv = transformer.to_python_value(ctx,
                                     lv,
                                     expected_python_type=FlyteFile["txt"])
    assert isinstance(pv, FlyteFile)
    assert pv.extension() == "txt"
Exemplo n.º 11
0
 def to_literal(
     self,
     ctx: FlyteContext,
     python_val: pandas.DataFrame,
     python_type: Type[pandas.DataFrame],
     expected: LiteralType,
 ) -> Literal:
     local_dir = ctx.file_access.get_random_local_directory()
     w = PandasSchemaWriter(local_dir=local_dir,
                            cols=None,
                            fmt=SchemaFormat.PARQUET)
     w.write(python_val)
     remote_path = ctx.file_access.get_random_remote_directory()
     ctx.file_access.put_data(local_dir, remote_path, is_multipart=True)
     return Literal(scalar=Scalar(
         schema=Schema(remote_path, self._get_schema_type())))
Exemplo n.º 12
0
def test_protos():
    ctx = FlyteContext.current_context()

    pb = errors_pb2.ContainerError(code="code", message="message")
    lt = TypeEngine.to_literal_type(errors_pb2.ContainerError)
    assert lt.simple == SimpleType.STRUCT
    assert lt.metadata["pb_type"] == "flyteidl.core.errors_pb2.ContainerError"

    lit = TypeEngine.to_literal(ctx, pb, errors_pb2.ContainerError, lt)
    new_python_val = TypeEngine.to_python_value(ctx, lit, errors_pb2.ContainerError)
    assert new_python_val == pb

    # Test error
    l0 = Literal(scalar=Scalar(primitive=Primitive(integer=4)))
    with pytest.raises(AssertionError):
        TypeEngine.to_python_value(ctx, l0, errors_pb2.ContainerError)
Exemplo n.º 13
0
    def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal:
        meta = BlobMetadata(
            type=_core_types.BlobType(
                format=self.PYTHON_PICKLE_FORMAT, dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE
            )
        )
        # Dump the task output into pickle
        local_dir = ctx.file_access.get_random_local_directory()
        os.makedirs(local_dir, exist_ok=True)
        local_path = ctx.file_access.get_random_local_path()
        uri = os.path.join(local_dir, local_path)
        with open(uri, "w+b") as outfile:
            cloudpickle.dump(python_val, outfile)

        remote_path = ctx.file_access.get_random_remote_path(uri)
        ctx.file_access.put_data(uri, remote_path, is_multipart=False)
        return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))
Exemplo n.º 14
0
def test_dolt_table_to_literal(mocker):
    df = pandas.DataFrame()
    mocker.patch("dolt_integrations.core.load", return_value=None)
    mocker.patch("doltcli.Dolt", return_value=None)
    mocker.patch("pandas.read_csv", return_value=df)

    s = Struct()
    s.update({"config": {"db_path": "", "tablename": "t"}})
    lv = Literal(Scalar(generic=s))

    res = DoltTableNameTransformer.to_python_value(
        self=None,
        ctx=None,
        lv=lv,
        expected_python_type=DoltTable,
    )

    assert res.data.equals(df)
Exemplo n.º 15
0
    def to_literal(self, ctx: FlyteContext, python_val: np.ndarray,
                   python_type: Type[np.ndarray],
                   expected: LiteralType) -> Literal:
        meta = BlobMetadata(type=_core_types.BlobType(
            format=self.NUMPY_ARRAY_FORMAT,
            dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE))

        local_path = ctx.file_access.get_random_local_path() + ".npy"
        pathlib.Path(local_path).parent.mkdir(parents=True, exist_ok=True)

        # save numpy array to a file
        # allow_pickle=False prevents numpy from trying to save object arrays (dtype=object) using pickle
        np.save(file=local_path, arr=python_val, allow_pickle=False)

        remote_path = ctx.file_access.get_random_remote_path(local_path)
        ctx.file_access.put_data(local_path, remote_path, is_multipart=False)
        return Literal(scalar=Scalar(
            blob=Blob(metadata=meta, uri=remote_path)))
Exemplo n.º 16
0
 def to_literal(
     self,
     ctx: FlyteContext,
     python_val: MyDataset,
     python_type: Type[MyDataset],
     expected: LiteralType,
 ) -> Literal:
     """
     This method is used to convert from given python type object ``MyDataset`` to the Literal representation
     """
     # Step 1: lets upload all the data into a remote place recommended by Flyte
     remote_dir = ctx.file_access.get_random_remote_directory()
     ctx.file_access.upload_directory(python_val.base_dir, remote_dir)
     # Step 2: lets return a pointer to this remote_dir in the form of a literal
     return Literal(
         scalar=Scalar(
             blob=Blob(uri=remote_dir, metadata=BlobMetadata(type=self._TYPE_INFO))
         )
     )
Exemplo n.º 17
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: Union[FlyteFile, FlyteSchema, str],
        python_type: Type[GreatExpectationsType],
        expected: LiteralType,
    ) -> Literal:
        datatype = GreatExpectationsTypeTransformer.get_config(python_type)[0]

        if issubclass(datatype, FlyteSchema):
            return FlyteSchemaTransformer().to_literal(ctx, python_val,
                                                       datatype, expected)
        elif issubclass(datatype, FlyteFile):
            return FlyteFilePathTransformer().to_literal(
                ctx, python_val, datatype, expected)
        elif issubclass(datatype, str):
            return Literal(scalar=Scalar(primitive=Primitive(
                string_value=python_val)))
        else:
            raise TypeError(f"{datatype} is not a supported type")
Exemplo n.º 18
0
 def to_literal(
     self,
     ctx: FlyteContext,
     python_val: pandas.DataFrame,
     python_type: Type[pandera.typing.DataFrame],
     expected: LiteralType,
 ) -> Literal:
     if isinstance(python_val, pandas.DataFrame):
         local_dir = ctx.file_access.get_random_local_directory()
         w = PandasSchemaWriter(local_dir=local_dir,
                                cols=self._get_col_dtypes(python_type),
                                fmt=SchemaFormat.PARQUET)
         w.write(python_val)
         remote_path = ctx.file_access.get_random_remote_directory()
         ctx.file_access.put_data(local_dir, remote_path, is_multipart=True)
         return Literal(scalar=Scalar(schema=Schema(
             remote_path, self._get_schema_type(python_type))))
     else:
         raise AssertionError(
             f"Only Pandas Dataframe object can be returned from a task, returned object type {type(python_val)}"
         )
Exemplo n.º 19
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: PyTorchCheckpoint,
        python_type: Type[PyTorchCheckpoint],
        expected: LiteralType,
    ) -> Literal:
        meta = BlobMetadata(
            type=_core_types.BlobType(
                format=self.PYTORCH_CHECKPOINT_FORMAT, dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE
            )
        )

        local_path = ctx.file_access.get_random_local_path() + ".pt"
        pathlib.Path(local_path).parent.mkdir(parents=True, exist_ok=True)

        to_save = {}
        for field in fields(python_val):
            value = getattr(python_val, field.name)

            if value and field.name in ["module", "optimizer"]:
                to_save[field.name + "_state_dict"] = getattr(value, "state_dict")()
            elif value and field.name == "hyperparameters":
                if isinstance(value, dict):
                    to_save.update(value)
                elif isinstance(value, tuple):
                    to_save.update(value._asdict())
                elif is_dataclass(value):
                    to_save.update(asdict(value))

        if not to_save:
            raise TypeTransformerFailedError(f"Cannot save empty {python_val}")

        # save checkpoint to a file
        torch.save(to_save, local_path)

        remote_path = ctx.file_access.get_random_remote_path(local_path)
        ctx.file_access.put_data(local_path, remote_path, is_multipart=False)
        return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))
Exemplo n.º 20
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: ScikitLearn2ONNX,
        python_type: Type[ScikitLearn2ONNX],
        expected: LiteralType,
    ) -> Literal:
        python_type, config = extract_config(python_type)

        if config:
            remote_path = ctx.file_access.get_random_remote_path()
            local_path = to_onnx(ctx, python_val.model, config.__dict__.copy())
            ctx.file_access.put_data(local_path,
                                     remote_path,
                                     is_multipart=False)
        else:
            raise TypeTransformerFailedError(f"{python_type}'s config is None")

        return Literal(scalar=Scalar(blob=Blob(
            uri=remote_path,
            metadata=BlobMetadata(type=BlobType(
                format=self.ONNX_FORMAT,
                dimensionality=BlobType.BlobDimensionality.SINGLE)),
        )))
Exemplo n.º 21
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: FlyteDirectory,
        python_type: typing.Type[FlyteDirectory],
        expected: LiteralType,
    ) -> Literal:

        remote_directory = None
        should_upload = True
        meta = BlobMetadata(type=self._blob_type(
            format=self.get_format(python_type)))

        # There are two kinds of literals we handle, either an actual FlyteDirectory, or a string path to a directory.
        # Handle the FlyteDirectory case
        if isinstance(python_val, FlyteDirectory):
            # If the object has a remote source, then we just convert it back.
            if python_val._remote_source is not None:
                return Literal(scalar=Scalar(
                    blob=Blob(metadata=meta, uri=python_val._remote_source)))

            source_path = python_val.path
            # If the user specified the remote_directory to be False, that means no matter what, do not upload. Also if the
            # path given is already a remote path, say https://www.google.com, the concept of uploading to the Flyte
            # blob store doesn't make sense.
            if python_val.remote_directory is False or ctx.file_access.is_remote(
                    source_path):
                should_upload = False

            # Set the remote destination if one was given instead of triggering a random one below
            remote_directory = python_val.remote_directory or None

        # Handle the string case
        elif isinstance(python_val, pathlib.Path) or isinstance(
                python_val, str):
            source_path = str(python_val)

            if ctx.file_access.is_remote(source_path):
                should_upload = False
            else:
                p = Path(source_path)
                if not p.is_dir():
                    raise ValueError(
                        f"Expected a directory. {source_path} is not a directory"
                    )
        else:
            raise AssertionError(
                f"Expected FlyteDirectory or os.PathLike object, received {type(python_val)}"
            )

        # If we're uploading something, that means that the uri should always point to the upload destination.
        if should_upload:
            if remote_directory is None:
                remote_directory = ctx.file_access.get_random_remote_directory(
                )
            ctx.file_access.put_data(source_path,
                                     remote_directory,
                                     is_multipart=True)
            return Literal(scalar=Scalar(
                blob=Blob(metadata=meta, uri=remote_directory)))

        # If not uploading, then we can only take the original source path as the uri.
        else:
            return Literal(scalar=Scalar(
                blob=Blob(metadata=meta, uri=source_path)))
Exemplo n.º 22
0
from flytekit.core.context_manager import FlyteContextManager
from flytekit.core.type_engine import LiteralsResolver, TypeEngine
from flytekit.models import interface as interface_models
from flytekit.models.literals import Literal, LiteralCollection, LiteralMap, Primitive, Scalar
from flytekit.types.structured.structured_dataset import StructuredDataset


@pytest.mark.parametrize(
    "literal_value,python_type,expected_python_value",
    [
        (
            Literal(
                collection=LiteralCollection(
                    literals=[
                        Literal(scalar=Scalar(primitive=Primitive(integer=1))),
                        Literal(scalar=Scalar(primitive=Primitive(integer=2))),
                        Literal(scalar=Scalar(primitive=Primitive(integer=3))),
                    ]
                )
            ),
            typing.List[int],
            [1, 2, 3],
        ),
        (
            Literal(
                map=LiteralMap(
                    literals={
                        "k1": Literal(scalar=Scalar(primitive=Primitive(string_value="v1"))),
                        "k2": Literal(scalar=Scalar(primitive=Primitive(string_value="2"))),
                    },
                )
Exemplo n.º 23
0
def test_interface():
    ctx = FlyteContextManager.current_context()
    lt = TypeEngine.to_literal_type(pd.DataFrame)
    df = pd.DataFrame({"name": ["Tom", "Joseph"], "age": [20, 22]})

    annotated_sd_type = Annotated[StructuredDataset, kwtypes(name=str, age=int)]
    df_literal_type = TypeEngine.to_literal_type(annotated_sd_type)
    assert df_literal_type.structured_dataset_type is not None
    assert len(df_literal_type.structured_dataset_type.columns) == 2
    assert df_literal_type.structured_dataset_type.columns[0].name == "name"
    assert df_literal_type.structured_dataset_type.columns[0].literal_type.simple is not None
    assert df_literal_type.structured_dataset_type.columns[1].name == "age"
    assert df_literal_type.structured_dataset_type.columns[1].literal_type.simple is not None

    sd = annotated_sd_type(df)
    sd_literal = TypeEngine.to_literal(ctx, sd, python_type=annotated_sd_type, expected=lt)

    lm = {
        "my_map": Literal(
            map=LiteralMap(
                literals={
                    "k1": Literal(scalar=Scalar(primitive=Primitive(string_value="v1"))),
                    "k2": Literal(scalar=Scalar(primitive=Primitive(string_value="2"))),
                },
            )
        ),
        "my_list": Literal(
            collection=LiteralCollection(
                literals=[
                    Literal(scalar=Scalar(primitive=Primitive(integer=1))),
                    Literal(scalar=Scalar(primitive=Primitive(integer=2))),
                    Literal(scalar=Scalar(primitive=Primitive(integer=3))),
                ]
            )
        ),
        "val_a": Literal(scalar=Scalar(primitive=Primitive(integer=21828))),
        "my_df": sd_literal,
    }

    variable_map = {
        "my_map": interface_models.Variable(type=TypeEngine.to_literal_type(typing.Dict[str, str]), description=""),
        "my_list": interface_models.Variable(type=TypeEngine.to_literal_type(typing.List[int]), description=""),
        "val_a": interface_models.Variable(type=TypeEngine.to_literal_type(int), description=""),
        "my_df": interface_models.Variable(type=df_literal_type, description=""),
    }

    lr = LiteralsResolver(lm, variable_map=variable_map, ctx=ctx)
    assert lr._ctx is ctx

    with pytest.raises(ValueError):
        lr["not"]  # noqa

    with pytest.raises(ValueError):
        lr.get_literal("not")

    # Test that just using [] works, guessing from the Flyte type is invoked
    result = lr["my_list"]
    assert result == [1, 2, 3]

    # Test that using get works, guessing from the Flyte type is invoked
    result = lr.get("my_map")
    assert result == {
        "k1": "v1",
        "k2": "2",
    }

    # Getting the literal will return the Literal object itself
    assert lr.get_literal("my_df") is sd_literal

    guessed_df = lr["my_df"]
    # Based on guessing, so no column information
    assert len(guessed_df.metadata.structured_dataset_type.columns) == 0
    guessed_df_2 = lr["my_df"]
    assert guessed_df is guessed_df_2

    # Update type hints with the annotated type
    lr.update_type_hints({"my_df": annotated_sd_type})
    del lr._native_values["my_df"]
    guessed_df = lr.get("my_df")
    # Using the user specified type, so number of columns is correct.
    assert len(guessed_df.metadata.structured_dataset_type.columns) == 2
Exemplo n.º 24
0
def test_dict_transformer():
    d = DictTransformer()

    def assert_struct(lit: LiteralType):
        assert lit is not None
        assert lit.simple == SimpleType.STRUCT

    def recursive_assert(lit: LiteralType,
                         expected: LiteralType,
                         expected_depth: int = 1,
                         curr_depth: int = 0):
        assert curr_depth <= expected_depth
        assert lit is not None
        if lit.map_value_type is None:
            assert lit == expected
            return
        recursive_assert(lit.map_value_type, expected, expected_depth,
                         curr_depth + 1)

    # Type inference
    assert_struct(d.get_literal_type(dict))
    assert_struct(d.get_literal_type(typing.Dict[int, int]))
    recursive_assert(d.get_literal_type(typing.Dict[str, str]),
                     LiteralType(simple=SimpleType.STRING))
    recursive_assert(d.get_literal_type(typing.Dict[str, int]),
                     LiteralType(simple=SimpleType.INTEGER))
    recursive_assert(d.get_literal_type(typing.Dict[str, datetime.datetime]),
                     LiteralType(simple=SimpleType.DATETIME))
    recursive_assert(d.get_literal_type(typing.Dict[str, datetime.timedelta]),
                     LiteralType(simple=SimpleType.DURATION))
    recursive_assert(d.get_literal_type(typing.Dict[str, dict]),
                     LiteralType(simple=SimpleType.STRUCT))
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[str, str]]),
        LiteralType(simple=SimpleType.STRING),
        expected_depth=2,
    )
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[int, str]]),
        LiteralType(simple=SimpleType.STRUCT),
        expected_depth=2,
    )
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[str,
                                                        typing.Dict[str,
                                                                    str]]]),
        LiteralType(simple=SimpleType.STRING),
        expected_depth=3,
    )
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[str,
                                                        typing.Dict[str,
                                                                    dict]]]),
        LiteralType(simple=SimpleType.STRUCT),
        expected_depth=3,
    )
    recursive_assert(
        d.get_literal_type(typing.Dict[str, typing.Dict[str,
                                                        typing.Dict[int,
                                                                    dict]]]),
        LiteralType(simple=SimpleType.STRUCT),
        expected_depth=2,
    )

    ctx = FlyteContext.current_context()

    lit = d.to_literal(ctx, {}, typing.Dict, LiteralType(SimpleType.STRUCT))
    pv = d.to_python_value(ctx, lit, typing.Dict)
    assert pv == {}

    # Literal to python
    with pytest.raises(TypeError):
        d.to_python_value(
            ctx, Literal(scalar=Scalar(primitive=Primitive(integer=10))), dict)
    with pytest.raises(TypeError):
        d.to_python_value(ctx, Literal(), dict)
    with pytest.raises(TypeError):
        d.to_python_value(ctx, Literal(map=LiteralMap(literals={"x": None})),
                          dict)
    with pytest.raises(TypeError):
        d.to_python_value(ctx, Literal(map=LiteralMap(literals={"x": None})),
                          typing.Dict[int, str])

    d.to_python_value(
        ctx,
        Literal(map=LiteralMap(
            literals={
                "x": Literal(scalar=Scalar(primitive=Primitive(integer=1)))
            })),
        typing.Dict[str, int],
    )
Exemplo n.º 25
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: Union[StructuredDataset, typing.Any],
        python_type: Union[Type[StructuredDataset], Type],
        expected: LiteralType,
    ) -> Literal:
        # Make a copy in case we need to hand off to encoders, since we can't be sure of mutations.
        # Check first to see if it's even an SD type. For backwards compatibility, we may be getting a FlyteSchema
        python_type, *attrs = extract_cols_and_format(python_type)
        # In case it's a FlyteSchema
        sdt = StructuredDatasetType(
            format=self.DEFAULT_FORMATS.get(python_type, None))

        if expected and expected.structured_dataset_type:
            sdt = StructuredDatasetType(
                columns=expected.structured_dataset_type.columns,
                format=expected.structured_dataset_type.format,
                external_schema_type=expected.structured_dataset_type.
                external_schema_type,
                external_schema_bytes=expected.structured_dataset_type.
                external_schema_bytes,
            )

        # If the type signature has the StructuredDataset class, it will, or at least should, also be a
        # StructuredDataset instance.
        if issubclass(python_type, StructuredDataset) and isinstance(
                python_val, StructuredDataset):
            # There are three cases that we need to take care of here.

            # 1. A task returns a StructuredDataset that was just a passthrough input. If this happens
            # then return the original literals.StructuredDataset without invoking any encoder
            #
            # Ex.
            #   def t1(dataset: Annotated[StructuredDataset, my_cols]) -> Annotated[StructuredDataset, my_cols]:
            #       return dataset
            if python_val._literal_sd is not None:
                if python_val.dataframe is not None:
                    raise ValueError(
                        f"Shouldn't have specified both literal {python_val._literal_sd} and dataframe {python_val.dataframe}"
                    )
                return Literal(scalar=Scalar(
                    structured_dataset=python_val._literal_sd))

            # 2. A task returns a python StructuredDataset with a uri.
            # Note: this case is also what happens we start a local execution of a task with a python StructuredDataset.
            #  It gets converted into a literal first, then back into a python StructuredDataset.
            #
            # Ex.
            #   def t2(uri: str) -> Annotated[StructuredDataset, my_cols]
            #       return StructuredDataset(uri=uri)
            if python_val.dataframe is None:
                if not python_val.uri:
                    raise ValueError(
                        f"If dataframe is not specified, then the uri should be specified. {python_val}"
                    )
                sd_model = literals.StructuredDataset(
                    uri=python_val.uri,
                    metadata=StructuredDatasetMetadata(
                        structured_dataset_type=sdt),
                )
                return Literal(scalar=Scalar(structured_dataset=sd_model))

            # 3. This is the third and probably most common case. The python StructuredDataset object wraps a dataframe
            # that we will need to invoke an encoder for. Figure out which encoder to call and invoke it.
            df_type = type(python_val.dataframe)
            if python_val.uri is None:
                protocol = self.DEFAULT_PROTOCOLS[df_type]
            else:
                protocol = protocol_prefix(python_val.uri)
            return self.encode(
                ctx,
                python_val,
                df_type,
                protocol,
                sdt.format or typing.cast(StructuredDataset,
                                          python_val).DEFAULT_FILE_FORMAT,
                sdt,
            )

        # Otherwise assume it's a dataframe instance. Wrap it with some defaults
        fmt = self.DEFAULT_FORMATS[python_type]
        protocol = self.DEFAULT_PROTOCOLS[python_type]
        meta = StructuredDatasetMetadata(
            structured_dataset_type=expected.
            structured_dataset_type if expected else None)

        sd = StructuredDataset(dataframe=python_val, metadata=meta)
        return self.encode(ctx, sd, python_type, protocol, fmt, sdt)
Exemplo n.º 26
0
def _register_default_type_transformers():
    TypeEngine.register(
        SimpleTransformer(
            "int",
            int,
            _primitives.Integer.to_flyte_literal_type(),
            lambda x: Literal(scalar=Scalar(primitive=Primitive(integer=x))),
            lambda x: x.scalar.primitive.integer,
        ))

    TypeEngine.register(
        SimpleTransformer(
            "float",
            float,
            _primitives.Float.to_flyte_literal_type(),
            lambda x: Literal(scalar=Scalar(primitive=Primitive(float_value=x))
                              ),
            _check_and_covert_float,
        ))

    TypeEngine.register(
        SimpleTransformer(
            "bool",
            bool,
            _primitives.Boolean.to_flyte_literal_type(),
            lambda x: Literal(scalar=Scalar(primitive=Primitive(boolean=x))),
            lambda x: x.scalar.primitive.boolean,
        ))

    TypeEngine.register(
        SimpleTransformer(
            "str",
            str,
            _primitives.String.to_flyte_literal_type(),
            lambda x: Literal(scalar=Scalar(primitive=Primitive(string_value=x)
                                            )),
            lambda x: x.scalar.primitive.string_value,
        ))

    TypeEngine.register(
        SimpleTransformer(
            "datetime",
            _datetime.datetime,
            _primitives.Datetime.to_flyte_literal_type(),
            lambda x: Literal(scalar=Scalar(primitive=Primitive(datetime=x))),
            lambda x: x.scalar.primitive.datetime,
        ))

    TypeEngine.register(
        SimpleTransformer(
            "timedelta",
            _datetime.timedelta,
            _primitives.Timedelta.to_flyte_literal_type(),
            lambda x: Literal(scalar=Scalar(primitive=Primitive(duration=x))),
            lambda x: x.scalar.primitive.duration,
        ))

    TypeEngine.register(
        SimpleTransformer(
            "none",
            None,
            _type_models.LiteralType(simple=_type_models.SimpleType.NONE),
            lambda x: None,
            lambda x: None,
        ))
    TypeEngine.register(ListTransformer())
    TypeEngine.register(DictTransformer())
    TypeEngine.register(TextIOTransformer())
    TypeEngine.register(PathLikeTransformer())
    TypeEngine.register(BinaryIOTransformer())

    # inner type is. Also unsupported are typing's Tuples. Even though you can look inside them, Flyte's type system
    # doesn't support these currently.
    # Confusing note: typing.NamedTuple is in here even though task functions themselves can return them. We just mean
    # that the return signature of a task can be a NamedTuple that contains another NamedTuple inside it.
    # Also, it's not entirely true that Flyte IDL doesn't support tuples. We can always fake them as structs, but we'll
    # hold off on doing that for now, as we may amend the IDL formally to support tuples.
    TypeEngine.register(RestrictedType("non typed tuple", tuple))
    TypeEngine.register(RestrictedType("non typed tuple", typing.Tuple))
    TypeEngine.register(RestrictedType("named tuple", typing.NamedTuple))
Exemplo n.º 27
0
 def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal:
     return Literal(scalar=Scalar(primitive=Primitive(string_value=python_val.value)))
Exemplo n.º 28
0
 def dict_to_generic_literal(v: dict) -> Literal:
     return Literal(scalar=Scalar(
         generic=_json_format.Parse(_json.dumps(v), _struct.Struct())))
Exemplo n.º 29
0
    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: typing.Union[FlyteFile, os.PathLike, str],
        python_type: typing.Type[FlyteFile],
        expected: LiteralType,
    ) -> Literal:
        remote_path = None
        should_upload = True

        if python_val is None:
            raise TypeTransformerFailedError("None value cannot be converted to a file.")

        if not (python_type is os.PathLike or issubclass(python_type, FlyteFile)):
            raise ValueError(f"Incorrect type {python_type}, must be either a FlyteFile or os.PathLike")

        # information used by all cases
        meta = BlobMetadata(type=self._blob_type(format=FlyteFilePathTransformer.get_format(python_type)))

        if isinstance(python_val, FlyteFile):
            source_path = python_val.path

            # If the object has a remote source, then we just convert it back. This means that if someone is just
            # going back and forth between a FlyteFile Python value and a Blob Flyte IDL value, we don't do anything.
            if python_val._remote_source is not None:
                return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=python_val._remote_source)))

            # If the user specified the remote_path to be False, that means no matter what, do not upload. Also if the
            # path given is already a remote path, say https://www.google.com, the concept of uploading to the Flyte
            # blob store doesn't make sense.
            if python_val.remote_path is False or ctx.file_access.is_remote(source_path):
                should_upload = False
            # If the type that's given is a simpler type, we also don't upload, and print a warning too.
            if python_type is os.PathLike:
                logger.warning(
                    f"Converting from a FlyteFile Python instance to a Blob Flyte object, but only a {python_type} was"
                    f" specified. Since a simpler type was specified, we'll skip uploading!"
                )
                should_upload = False

            # Set the remote destination if one was given instead of triggering a random one below
            remote_path = python_val.remote_path or None

        elif isinstance(python_val, pathlib.Path) or isinstance(python_val, str):
            source_path = str(python_val)
            if issubclass(python_type, FlyteFile):
                if ctx.file_access.is_remote(source_path):
                    should_upload = False
                else:
                    if isinstance(python_val, pathlib.Path) and not python_val.is_file():
                        raise ValueError(f"Error converting pathlib.Path {python_val} because it's not a file.")

                    # If it's a string pointing to a local destination, then make sure it's a file.
                    if isinstance(python_val, str):
                        p = pathlib.Path(python_val)
                        if not p.is_file():
                            raise TypeTransformerFailedError(f"Error converting {python_val} because it's not a file.")
            # python_type must be os.PathLike - see check at beginning of function
            else:
                should_upload = False

        else:
            raise TypeTransformerFailedError(f"Expected FlyteFile or os.PathLike object, received {type(python_val)}")

        # If we're uploading something, that means that the uri should always point to the upload destination.
        if should_upload:
            if remote_path is None:
                remote_path = ctx.file_access.get_random_remote_path(source_path)
            ctx.file_access.put_data(source_path, remote_path, is_multipart=False)
            return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))
        # If not uploading, then we can only take the original source path as the uri.
        else:
            return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=source_path)))
Exemplo n.º 30
0
 def to_literal(self, ctx: FlyteContext, python_val: T,
                python_type: Type[T], expected: LiteralType) -> Literal:
     struct = Struct()
     struct.update(_MessageToDict(python_val))
     return Literal(scalar=Scalar(generic=struct))