def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal: t = self.get_sub_type(python_type) lit_list = [ TypeEngine.to_literal(ctx, x, t, expected.collection_type) for x in python_val ] return Literal(collection=LiteralCollection(literals=lit_list))
def to_literal( self, ctx: FlyteContext, python_val: typing.Union[FlyteFile, os.PathLike, str], python_type: typing.Type[FlyteFile], expected: LiteralType, ) -> Literal: remote_path = None should_upload = True if python_val is None: raise AssertionError("None value cannot be converted to a file.") if isinstance(python_val, FlyteFile): # If the object has a remote source, then we just convert it back. if python_val._remote_source is not None: meta = BlobMetadata(type=self._blob_type(format=self.get_format(python_type))) return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=python_val._remote_source))) source_path = python_val.path if python_val.remote_path is False: # If the user specified the remote_path to be False, that means no matter what, do not upload should_upload = False else: # Otherwise, if not an "" use the user-specified remote path instead of the random one remote_path = python_val.remote_path or None else: if not (isinstance(python_val, os.PathLike) or isinstance(python_val, str)): raise AssertionError(f"Expected FlyteFile or os.PathLike object, received {type(python_val)}") source_path = python_val # For remote values, say https://raw.github.com/demo_data.csv, we will not upload to Flyte's store (S3/GCS) # and just return a literal with a uri equal to the path given if ctx.file_access.is_remote(source_path) or not should_upload: # TODO: Add copying functionality so that FlyteFile(path="s3://a", remote_path="s3://b") will copy. meta = BlobMetadata(type=self._blob_type(format=FlyteFilePathTransformer.get_format(python_type))) return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=source_path))) # For local paths, we will upload to the Flyte store (note that for local execution, the remote store is just # a subfolder), unless remote_path=False was given else: if remote_path is None: remote_path = ctx.file_access.get_random_remote_path(source_path) ctx.file_access.put_data(source_path, remote_path, is_multipart=False) meta = BlobMetadata(type=self._blob_type(format=FlyteFilePathTransformer.get_format(python_type))) return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path or source_path)))
def to_literal(self, ctx: FlyteContext, python_val: os.PathLike, python_type: Type[os.PathLike], expected: LiteralType) -> Literal: # TODO we could guess the mimetype and allow the format to be changed at runtime. thus a non existent format # could be replaced with a guess format? rpath = ctx.file_access.get_random_remote_path() # For remote values, say https://raw.github.com/demo_data.csv, we will not upload to Flyte's store (S3/GCS) # and just return a literal with a uri equal to the path given if ctx.file_access.is_remote(python_val): return Literal(scalar=Scalar(blob=Blob( metadata=BlobMetadata(expected.blob), uri=python_val))) # For local files, we'll upload for the user. ctx.file_access.put_data(python_val, rpath, is_multipart=False) return Literal(scalar=Scalar( blob=Blob(metadata=BlobMetadata(expected.blob), uri=rpath)))
def _recursive_hash_placement(literal: Literal) -> Literal: if literal.collection is not None: literals = [ _recursive_hash_placement(literal) for literal in literal.collection.literals ] return Literal(collection=LiteralCollection(literals=literals)) elif literal.map is not None: literal_map = {} for key, literal in literal.map.literals.items(): literal_map[key] = _recursive_hash_placement(literal) return Literal(map=LiteralMap(literal_map)) # Base case if literal.hash is not None: return Literal(hash=literal.hash) else: return literal
def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal: if not dataclasses.is_dataclass(python_val): raise AssertionError( f"{type(python_val)} is not of type @dataclass, only Dataclasses are supported for " f"user defined datatypes in Flytekit" ) if not issubclass(type(python_val), DataClassJsonMixin): raise AssertionError( f"Dataclass {python_type} should be decorated with @dataclass_json to be " f"serialized correctly" ) return Literal(scalar=Scalar(generic=_json_format.Parse(python_val.to_json(), _struct.Struct())))
def test_dolt_table_to_literal_error(): s = Struct() s.update({"dummy": "data"}) lv = Literal(Scalar(generic=s)) with pytest.raises(ValueError): DoltTableNameTransformer.to_python_value( self=None, ctx=None, lv=lv, expected_python_type=DoltTable, )
def to_literal( self, ctx: FlyteContext, python_val: DatasetProfileView, python_type: Type[DatasetProfileView], expected: LiteralType, ) -> Literal: remote_path = ctx.file_access.get_random_remote_directory() local_dir = ctx.file_access.get_random_local_path() python_val.write(local_dir) ctx.file_access.upload(local_dir, remote_path) return Literal(scalar=Scalar(blob=Blob(uri=remote_path, metadata=BlobMetadata(type=self._TYPE_INFO))))
def to_literal( self, ctx: FlyteContext, python_val: typing.Any, python_type: Type[dict], expected: LiteralType ) -> Literal: if expected and expected.simple and expected.simple == SimpleType.STRUCT: return self.dict_to_generic_literal(python_val) lit_map = {} for k, v in python_val.items(): if type(k) != str: raise ValueError("Flyte MapType expects all keys to be strings") k_type, v_type = self.get_dict_types(python_type) lit_map[k] = TypeEngine.to_literal(ctx, v, v_type, expected.map_value_type) return Literal(map=LiteralMap(literals=lit_map))
def to_literal( self, ctx: FlyteContext, python_val: pyspark.sql.DataFrame, python_type: Type[pyspark.sql.DataFrame], expected: LiteralType, ) -> Literal: remote_path = ctx.file_access.get_random_remote_directory() w = SparkDataFrameSchemaWriter(to_path=remote_path, cols=None, fmt=SchemaFormat.PARQUET) w.write(python_val) return Literal(scalar=Scalar( schema=Schema(remote_path, self._get_schema_type())))
def test_file_format_getting_python_value(): transformer = TypeEngine.get_transformer(FlyteFile) ctx = FlyteContext.current_context() # This file probably won't exist, but it's okay. It won't be downloaded unless we try to read the thing returned lv = Literal(scalar=Scalar(blob=Blob(metadata=BlobMetadata( type=BlobType(format="txt", dimensionality=0)), uri="file:///tmp/test"))) pv = transformer.to_python_value(ctx, lv, expected_python_type=FlyteFile["txt"]) assert isinstance(pv, FlyteFile) assert pv.extension() == "txt"
def to_literal( self, ctx: FlyteContext, python_val: pandas.DataFrame, python_type: Type[pandas.DataFrame], expected: LiteralType, ) -> Literal: local_dir = ctx.file_access.get_random_local_directory() w = PandasSchemaWriter(local_dir=local_dir, cols=None, fmt=SchemaFormat.PARQUET) w.write(python_val) remote_path = ctx.file_access.get_random_remote_directory() ctx.file_access.put_data(local_dir, remote_path, is_multipart=True) return Literal(scalar=Scalar( schema=Schema(remote_path, self._get_schema_type())))
def test_protos(): ctx = FlyteContext.current_context() pb = errors_pb2.ContainerError(code="code", message="message") lt = TypeEngine.to_literal_type(errors_pb2.ContainerError) assert lt.simple == SimpleType.STRUCT assert lt.metadata["pb_type"] == "flyteidl.core.errors_pb2.ContainerError" lit = TypeEngine.to_literal(ctx, pb, errors_pb2.ContainerError, lt) new_python_val = TypeEngine.to_python_value(ctx, lit, errors_pb2.ContainerError) assert new_python_val == pb # Test error l0 = Literal(scalar=Scalar(primitive=Primitive(integer=4))) with pytest.raises(AssertionError): TypeEngine.to_python_value(ctx, l0, errors_pb2.ContainerError)
def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal: meta = BlobMetadata( type=_core_types.BlobType( format=self.PYTHON_PICKLE_FORMAT, dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE ) ) # Dump the task output into pickle local_dir = ctx.file_access.get_random_local_directory() os.makedirs(local_dir, exist_ok=True) local_path = ctx.file_access.get_random_local_path() uri = os.path.join(local_dir, local_path) with open(uri, "w+b") as outfile: cloudpickle.dump(python_val, outfile) remote_path = ctx.file_access.get_random_remote_path(uri) ctx.file_access.put_data(uri, remote_path, is_multipart=False) return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))
def test_dolt_table_to_literal(mocker): df = pandas.DataFrame() mocker.patch("dolt_integrations.core.load", return_value=None) mocker.patch("doltcli.Dolt", return_value=None) mocker.patch("pandas.read_csv", return_value=df) s = Struct() s.update({"config": {"db_path": "", "tablename": "t"}}) lv = Literal(Scalar(generic=s)) res = DoltTableNameTransformer.to_python_value( self=None, ctx=None, lv=lv, expected_python_type=DoltTable, ) assert res.data.equals(df)
def to_literal(self, ctx: FlyteContext, python_val: np.ndarray, python_type: Type[np.ndarray], expected: LiteralType) -> Literal: meta = BlobMetadata(type=_core_types.BlobType( format=self.NUMPY_ARRAY_FORMAT, dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE)) local_path = ctx.file_access.get_random_local_path() + ".npy" pathlib.Path(local_path).parent.mkdir(parents=True, exist_ok=True) # save numpy array to a file # allow_pickle=False prevents numpy from trying to save object arrays (dtype=object) using pickle np.save(file=local_path, arr=python_val, allow_pickle=False) remote_path = ctx.file_access.get_random_remote_path(local_path) ctx.file_access.put_data(local_path, remote_path, is_multipart=False) return Literal(scalar=Scalar( blob=Blob(metadata=meta, uri=remote_path)))
def to_literal( self, ctx: FlyteContext, python_val: MyDataset, python_type: Type[MyDataset], expected: LiteralType, ) -> Literal: """ This method is used to convert from given python type object ``MyDataset`` to the Literal representation """ # Step 1: lets upload all the data into a remote place recommended by Flyte remote_dir = ctx.file_access.get_random_remote_directory() ctx.file_access.upload_directory(python_val.base_dir, remote_dir) # Step 2: lets return a pointer to this remote_dir in the form of a literal return Literal( scalar=Scalar( blob=Blob(uri=remote_dir, metadata=BlobMetadata(type=self._TYPE_INFO)) ) )
def to_literal( self, ctx: FlyteContext, python_val: Union[FlyteFile, FlyteSchema, str], python_type: Type[GreatExpectationsType], expected: LiteralType, ) -> Literal: datatype = GreatExpectationsTypeTransformer.get_config(python_type)[0] if issubclass(datatype, FlyteSchema): return FlyteSchemaTransformer().to_literal(ctx, python_val, datatype, expected) elif issubclass(datatype, FlyteFile): return FlyteFilePathTransformer().to_literal( ctx, python_val, datatype, expected) elif issubclass(datatype, str): return Literal(scalar=Scalar(primitive=Primitive( string_value=python_val))) else: raise TypeError(f"{datatype} is not a supported type")
def to_literal( self, ctx: FlyteContext, python_val: pandas.DataFrame, python_type: Type[pandera.typing.DataFrame], expected: LiteralType, ) -> Literal: if isinstance(python_val, pandas.DataFrame): local_dir = ctx.file_access.get_random_local_directory() w = PandasSchemaWriter(local_dir=local_dir, cols=self._get_col_dtypes(python_type), fmt=SchemaFormat.PARQUET) w.write(python_val) remote_path = ctx.file_access.get_random_remote_directory() ctx.file_access.put_data(local_dir, remote_path, is_multipart=True) return Literal(scalar=Scalar(schema=Schema( remote_path, self._get_schema_type(python_type)))) else: raise AssertionError( f"Only Pandas Dataframe object can be returned from a task, returned object type {type(python_val)}" )
def to_literal( self, ctx: FlyteContext, python_val: PyTorchCheckpoint, python_type: Type[PyTorchCheckpoint], expected: LiteralType, ) -> Literal: meta = BlobMetadata( type=_core_types.BlobType( format=self.PYTORCH_CHECKPOINT_FORMAT, dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE ) ) local_path = ctx.file_access.get_random_local_path() + ".pt" pathlib.Path(local_path).parent.mkdir(parents=True, exist_ok=True) to_save = {} for field in fields(python_val): value = getattr(python_val, field.name) if value and field.name in ["module", "optimizer"]: to_save[field.name + "_state_dict"] = getattr(value, "state_dict")() elif value and field.name == "hyperparameters": if isinstance(value, dict): to_save.update(value) elif isinstance(value, tuple): to_save.update(value._asdict()) elif is_dataclass(value): to_save.update(asdict(value)) if not to_save: raise TypeTransformerFailedError(f"Cannot save empty {python_val}") # save checkpoint to a file torch.save(to_save, local_path) remote_path = ctx.file_access.get_random_remote_path(local_path) ctx.file_access.put_data(local_path, remote_path, is_multipart=False) return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))
def to_literal( self, ctx: FlyteContext, python_val: ScikitLearn2ONNX, python_type: Type[ScikitLearn2ONNX], expected: LiteralType, ) -> Literal: python_type, config = extract_config(python_type) if config: remote_path = ctx.file_access.get_random_remote_path() local_path = to_onnx(ctx, python_val.model, config.__dict__.copy()) ctx.file_access.put_data(local_path, remote_path, is_multipart=False) else: raise TypeTransformerFailedError(f"{python_type}'s config is None") return Literal(scalar=Scalar(blob=Blob( uri=remote_path, metadata=BlobMetadata(type=BlobType( format=self.ONNX_FORMAT, dimensionality=BlobType.BlobDimensionality.SINGLE)), )))
def to_literal( self, ctx: FlyteContext, python_val: FlyteDirectory, python_type: typing.Type[FlyteDirectory], expected: LiteralType, ) -> Literal: remote_directory = None should_upload = True meta = BlobMetadata(type=self._blob_type( format=self.get_format(python_type))) # There are two kinds of literals we handle, either an actual FlyteDirectory, or a string path to a directory. # Handle the FlyteDirectory case if isinstance(python_val, FlyteDirectory): # If the object has a remote source, then we just convert it back. if python_val._remote_source is not None: return Literal(scalar=Scalar( blob=Blob(metadata=meta, uri=python_val._remote_source))) source_path = python_val.path # If the user specified the remote_directory to be False, that means no matter what, do not upload. Also if the # path given is already a remote path, say https://www.google.com, the concept of uploading to the Flyte # blob store doesn't make sense. if python_val.remote_directory is False or ctx.file_access.is_remote( source_path): should_upload = False # Set the remote destination if one was given instead of triggering a random one below remote_directory = python_val.remote_directory or None # Handle the string case elif isinstance(python_val, pathlib.Path) or isinstance( python_val, str): source_path = str(python_val) if ctx.file_access.is_remote(source_path): should_upload = False else: p = Path(source_path) if not p.is_dir(): raise ValueError( f"Expected a directory. {source_path} is not a directory" ) else: raise AssertionError( f"Expected FlyteDirectory or os.PathLike object, received {type(python_val)}" ) # If we're uploading something, that means that the uri should always point to the upload destination. if should_upload: if remote_directory is None: remote_directory = ctx.file_access.get_random_remote_directory( ) ctx.file_access.put_data(source_path, remote_directory, is_multipart=True) return Literal(scalar=Scalar( blob=Blob(metadata=meta, uri=remote_directory))) # If not uploading, then we can only take the original source path as the uri. else: return Literal(scalar=Scalar( blob=Blob(metadata=meta, uri=source_path)))
from flytekit.core.context_manager import FlyteContextManager from flytekit.core.type_engine import LiteralsResolver, TypeEngine from flytekit.models import interface as interface_models from flytekit.models.literals import Literal, LiteralCollection, LiteralMap, Primitive, Scalar from flytekit.types.structured.structured_dataset import StructuredDataset @pytest.mark.parametrize( "literal_value,python_type,expected_python_value", [ ( Literal( collection=LiteralCollection( literals=[ Literal(scalar=Scalar(primitive=Primitive(integer=1))), Literal(scalar=Scalar(primitive=Primitive(integer=2))), Literal(scalar=Scalar(primitive=Primitive(integer=3))), ] ) ), typing.List[int], [1, 2, 3], ), ( Literal( map=LiteralMap( literals={ "k1": Literal(scalar=Scalar(primitive=Primitive(string_value="v1"))), "k2": Literal(scalar=Scalar(primitive=Primitive(string_value="2"))), }, )
def test_interface(): ctx = FlyteContextManager.current_context() lt = TypeEngine.to_literal_type(pd.DataFrame) df = pd.DataFrame({"name": ["Tom", "Joseph"], "age": [20, 22]}) annotated_sd_type = Annotated[StructuredDataset, kwtypes(name=str, age=int)] df_literal_type = TypeEngine.to_literal_type(annotated_sd_type) assert df_literal_type.structured_dataset_type is not None assert len(df_literal_type.structured_dataset_type.columns) == 2 assert df_literal_type.structured_dataset_type.columns[0].name == "name" assert df_literal_type.structured_dataset_type.columns[0].literal_type.simple is not None assert df_literal_type.structured_dataset_type.columns[1].name == "age" assert df_literal_type.structured_dataset_type.columns[1].literal_type.simple is not None sd = annotated_sd_type(df) sd_literal = TypeEngine.to_literal(ctx, sd, python_type=annotated_sd_type, expected=lt) lm = { "my_map": Literal( map=LiteralMap( literals={ "k1": Literal(scalar=Scalar(primitive=Primitive(string_value="v1"))), "k2": Literal(scalar=Scalar(primitive=Primitive(string_value="2"))), }, ) ), "my_list": Literal( collection=LiteralCollection( literals=[ Literal(scalar=Scalar(primitive=Primitive(integer=1))), Literal(scalar=Scalar(primitive=Primitive(integer=2))), Literal(scalar=Scalar(primitive=Primitive(integer=3))), ] ) ), "val_a": Literal(scalar=Scalar(primitive=Primitive(integer=21828))), "my_df": sd_literal, } variable_map = { "my_map": interface_models.Variable(type=TypeEngine.to_literal_type(typing.Dict[str, str]), description=""), "my_list": interface_models.Variable(type=TypeEngine.to_literal_type(typing.List[int]), description=""), "val_a": interface_models.Variable(type=TypeEngine.to_literal_type(int), description=""), "my_df": interface_models.Variable(type=df_literal_type, description=""), } lr = LiteralsResolver(lm, variable_map=variable_map, ctx=ctx) assert lr._ctx is ctx with pytest.raises(ValueError): lr["not"] # noqa with pytest.raises(ValueError): lr.get_literal("not") # Test that just using [] works, guessing from the Flyte type is invoked result = lr["my_list"] assert result == [1, 2, 3] # Test that using get works, guessing from the Flyte type is invoked result = lr.get("my_map") assert result == { "k1": "v1", "k2": "2", } # Getting the literal will return the Literal object itself assert lr.get_literal("my_df") is sd_literal guessed_df = lr["my_df"] # Based on guessing, so no column information assert len(guessed_df.metadata.structured_dataset_type.columns) == 0 guessed_df_2 = lr["my_df"] assert guessed_df is guessed_df_2 # Update type hints with the annotated type lr.update_type_hints({"my_df": annotated_sd_type}) del lr._native_values["my_df"] guessed_df = lr.get("my_df") # Using the user specified type, so number of columns is correct. assert len(guessed_df.metadata.structured_dataset_type.columns) == 2
def test_dict_transformer(): d = DictTransformer() def assert_struct(lit: LiteralType): assert lit is not None assert lit.simple == SimpleType.STRUCT def recursive_assert(lit: LiteralType, expected: LiteralType, expected_depth: int = 1, curr_depth: int = 0): assert curr_depth <= expected_depth assert lit is not None if lit.map_value_type is None: assert lit == expected return recursive_assert(lit.map_value_type, expected, expected_depth, curr_depth + 1) # Type inference assert_struct(d.get_literal_type(dict)) assert_struct(d.get_literal_type(typing.Dict[int, int])) recursive_assert(d.get_literal_type(typing.Dict[str, str]), LiteralType(simple=SimpleType.STRING)) recursive_assert(d.get_literal_type(typing.Dict[str, int]), LiteralType(simple=SimpleType.INTEGER)) recursive_assert(d.get_literal_type(typing.Dict[str, datetime.datetime]), LiteralType(simple=SimpleType.DATETIME)) recursive_assert(d.get_literal_type(typing.Dict[str, datetime.timedelta]), LiteralType(simple=SimpleType.DURATION)) recursive_assert(d.get_literal_type(typing.Dict[str, dict]), LiteralType(simple=SimpleType.STRUCT)) recursive_assert( d.get_literal_type(typing.Dict[str, typing.Dict[str, str]]), LiteralType(simple=SimpleType.STRING), expected_depth=2, ) recursive_assert( d.get_literal_type(typing.Dict[str, typing.Dict[int, str]]), LiteralType(simple=SimpleType.STRUCT), expected_depth=2, ) recursive_assert( d.get_literal_type(typing.Dict[str, typing.Dict[str, typing.Dict[str, str]]]), LiteralType(simple=SimpleType.STRING), expected_depth=3, ) recursive_assert( d.get_literal_type(typing.Dict[str, typing.Dict[str, typing.Dict[str, dict]]]), LiteralType(simple=SimpleType.STRUCT), expected_depth=3, ) recursive_assert( d.get_literal_type(typing.Dict[str, typing.Dict[str, typing.Dict[int, dict]]]), LiteralType(simple=SimpleType.STRUCT), expected_depth=2, ) ctx = FlyteContext.current_context() lit = d.to_literal(ctx, {}, typing.Dict, LiteralType(SimpleType.STRUCT)) pv = d.to_python_value(ctx, lit, typing.Dict) assert pv == {} # Literal to python with pytest.raises(TypeError): d.to_python_value( ctx, Literal(scalar=Scalar(primitive=Primitive(integer=10))), dict) with pytest.raises(TypeError): d.to_python_value(ctx, Literal(), dict) with pytest.raises(TypeError): d.to_python_value(ctx, Literal(map=LiteralMap(literals={"x": None})), dict) with pytest.raises(TypeError): d.to_python_value(ctx, Literal(map=LiteralMap(literals={"x": None})), typing.Dict[int, str]) d.to_python_value( ctx, Literal(map=LiteralMap( literals={ "x": Literal(scalar=Scalar(primitive=Primitive(integer=1))) })), typing.Dict[str, int], )
def to_literal( self, ctx: FlyteContext, python_val: Union[StructuredDataset, typing.Any], python_type: Union[Type[StructuredDataset], Type], expected: LiteralType, ) -> Literal: # Make a copy in case we need to hand off to encoders, since we can't be sure of mutations. # Check first to see if it's even an SD type. For backwards compatibility, we may be getting a FlyteSchema python_type, *attrs = extract_cols_and_format(python_type) # In case it's a FlyteSchema sdt = StructuredDatasetType( format=self.DEFAULT_FORMATS.get(python_type, None)) if expected and expected.structured_dataset_type: sdt = StructuredDatasetType( columns=expected.structured_dataset_type.columns, format=expected.structured_dataset_type.format, external_schema_type=expected.structured_dataset_type. external_schema_type, external_schema_bytes=expected.structured_dataset_type. external_schema_bytes, ) # If the type signature has the StructuredDataset class, it will, or at least should, also be a # StructuredDataset instance. if issubclass(python_type, StructuredDataset) and isinstance( python_val, StructuredDataset): # There are three cases that we need to take care of here. # 1. A task returns a StructuredDataset that was just a passthrough input. If this happens # then return the original literals.StructuredDataset without invoking any encoder # # Ex. # def t1(dataset: Annotated[StructuredDataset, my_cols]) -> Annotated[StructuredDataset, my_cols]: # return dataset if python_val._literal_sd is not None: if python_val.dataframe is not None: raise ValueError( f"Shouldn't have specified both literal {python_val._literal_sd} and dataframe {python_val.dataframe}" ) return Literal(scalar=Scalar( structured_dataset=python_val._literal_sd)) # 2. A task returns a python StructuredDataset with a uri. # Note: this case is also what happens we start a local execution of a task with a python StructuredDataset. # It gets converted into a literal first, then back into a python StructuredDataset. # # Ex. # def t2(uri: str) -> Annotated[StructuredDataset, my_cols] # return StructuredDataset(uri=uri) if python_val.dataframe is None: if not python_val.uri: raise ValueError( f"If dataframe is not specified, then the uri should be specified. {python_val}" ) sd_model = literals.StructuredDataset( uri=python_val.uri, metadata=StructuredDatasetMetadata( structured_dataset_type=sdt), ) return Literal(scalar=Scalar(structured_dataset=sd_model)) # 3. This is the third and probably most common case. The python StructuredDataset object wraps a dataframe # that we will need to invoke an encoder for. Figure out which encoder to call and invoke it. df_type = type(python_val.dataframe) if python_val.uri is None: protocol = self.DEFAULT_PROTOCOLS[df_type] else: protocol = protocol_prefix(python_val.uri) return self.encode( ctx, python_val, df_type, protocol, sdt.format or typing.cast(StructuredDataset, python_val).DEFAULT_FILE_FORMAT, sdt, ) # Otherwise assume it's a dataframe instance. Wrap it with some defaults fmt = self.DEFAULT_FORMATS[python_type] protocol = self.DEFAULT_PROTOCOLS[python_type] meta = StructuredDatasetMetadata( structured_dataset_type=expected. structured_dataset_type if expected else None) sd = StructuredDataset(dataframe=python_val, metadata=meta) return self.encode(ctx, sd, python_type, protocol, fmt, sdt)
def _register_default_type_transformers(): TypeEngine.register( SimpleTransformer( "int", int, _primitives.Integer.to_flyte_literal_type(), lambda x: Literal(scalar=Scalar(primitive=Primitive(integer=x))), lambda x: x.scalar.primitive.integer, )) TypeEngine.register( SimpleTransformer( "float", float, _primitives.Float.to_flyte_literal_type(), lambda x: Literal(scalar=Scalar(primitive=Primitive(float_value=x)) ), _check_and_covert_float, )) TypeEngine.register( SimpleTransformer( "bool", bool, _primitives.Boolean.to_flyte_literal_type(), lambda x: Literal(scalar=Scalar(primitive=Primitive(boolean=x))), lambda x: x.scalar.primitive.boolean, )) TypeEngine.register( SimpleTransformer( "str", str, _primitives.String.to_flyte_literal_type(), lambda x: Literal(scalar=Scalar(primitive=Primitive(string_value=x) )), lambda x: x.scalar.primitive.string_value, )) TypeEngine.register( SimpleTransformer( "datetime", _datetime.datetime, _primitives.Datetime.to_flyte_literal_type(), lambda x: Literal(scalar=Scalar(primitive=Primitive(datetime=x))), lambda x: x.scalar.primitive.datetime, )) TypeEngine.register( SimpleTransformer( "timedelta", _datetime.timedelta, _primitives.Timedelta.to_flyte_literal_type(), lambda x: Literal(scalar=Scalar(primitive=Primitive(duration=x))), lambda x: x.scalar.primitive.duration, )) TypeEngine.register( SimpleTransformer( "none", None, _type_models.LiteralType(simple=_type_models.SimpleType.NONE), lambda x: None, lambda x: None, )) TypeEngine.register(ListTransformer()) TypeEngine.register(DictTransformer()) TypeEngine.register(TextIOTransformer()) TypeEngine.register(PathLikeTransformer()) TypeEngine.register(BinaryIOTransformer()) # inner type is. Also unsupported are typing's Tuples. Even though you can look inside them, Flyte's type system # doesn't support these currently. # Confusing note: typing.NamedTuple is in here even though task functions themselves can return them. We just mean # that the return signature of a task can be a NamedTuple that contains another NamedTuple inside it. # Also, it's not entirely true that Flyte IDL doesn't support tuples. We can always fake them as structs, but we'll # hold off on doing that for now, as we may amend the IDL formally to support tuples. TypeEngine.register(RestrictedType("non typed tuple", tuple)) TypeEngine.register(RestrictedType("non typed tuple", typing.Tuple)) TypeEngine.register(RestrictedType("named tuple", typing.NamedTuple))
def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal: return Literal(scalar=Scalar(primitive=Primitive(string_value=python_val.value)))
def dict_to_generic_literal(v: dict) -> Literal: return Literal(scalar=Scalar( generic=_json_format.Parse(_json.dumps(v), _struct.Struct())))
def to_literal( self, ctx: FlyteContext, python_val: typing.Union[FlyteFile, os.PathLike, str], python_type: typing.Type[FlyteFile], expected: LiteralType, ) -> Literal: remote_path = None should_upload = True if python_val is None: raise TypeTransformerFailedError("None value cannot be converted to a file.") if not (python_type is os.PathLike or issubclass(python_type, FlyteFile)): raise ValueError(f"Incorrect type {python_type}, must be either a FlyteFile or os.PathLike") # information used by all cases meta = BlobMetadata(type=self._blob_type(format=FlyteFilePathTransformer.get_format(python_type))) if isinstance(python_val, FlyteFile): source_path = python_val.path # If the object has a remote source, then we just convert it back. This means that if someone is just # going back and forth between a FlyteFile Python value and a Blob Flyte IDL value, we don't do anything. if python_val._remote_source is not None: return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=python_val._remote_source))) # If the user specified the remote_path to be False, that means no matter what, do not upload. Also if the # path given is already a remote path, say https://www.google.com, the concept of uploading to the Flyte # blob store doesn't make sense. if python_val.remote_path is False or ctx.file_access.is_remote(source_path): should_upload = False # If the type that's given is a simpler type, we also don't upload, and print a warning too. if python_type is os.PathLike: logger.warning( f"Converting from a FlyteFile Python instance to a Blob Flyte object, but only a {python_type} was" f" specified. Since a simpler type was specified, we'll skip uploading!" ) should_upload = False # Set the remote destination if one was given instead of triggering a random one below remote_path = python_val.remote_path or None elif isinstance(python_val, pathlib.Path) or isinstance(python_val, str): source_path = str(python_val) if issubclass(python_type, FlyteFile): if ctx.file_access.is_remote(source_path): should_upload = False else: if isinstance(python_val, pathlib.Path) and not python_val.is_file(): raise ValueError(f"Error converting pathlib.Path {python_val} because it's not a file.") # If it's a string pointing to a local destination, then make sure it's a file. if isinstance(python_val, str): p = pathlib.Path(python_val) if not p.is_file(): raise TypeTransformerFailedError(f"Error converting {python_val} because it's not a file.") # python_type must be os.PathLike - see check at beginning of function else: should_upload = False else: raise TypeTransformerFailedError(f"Expected FlyteFile or os.PathLike object, received {type(python_val)}") # If we're uploading something, that means that the uri should always point to the upload destination. if should_upload: if remote_path is None: remote_path = ctx.file_access.get_random_remote_path(source_path) ctx.file_access.put_data(source_path, remote_path, is_multipart=False) return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path))) # If not uploading, then we can only take the original source path as the uri. else: return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=source_path)))
def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal: struct = Struct() struct.update(_MessageToDict(python_val)) return Literal(scalar=Scalar(generic=struct))