def test_dont_convert_remotes(): @task def t1(in1: FlyteFile): print(in1) @dynamic def dyn(in1: FlyteFile): t1(in1=in1) fd = FlyteFile("s3://anything") with context_manager.FlyteContext.current_context( ).new_serialization_settings( serialization_settings=context_manager.SerializationSettings( project="test_proj", domain="test_domain", version="abc", image_config=ImageConfig( Image(name="name", fqn="image", tag="name")), env={}, )) as ctx: with ctx.new_execution_context( mode=ExecutionState.Mode.TASK_EXECUTION) as ctx: lit = TypeEngine.to_literal( ctx, fd, FlyteFile, BlobType("", dimensionality=BlobType.BlobDimensionality.SINGLE)) lm = LiteralMap(literals={"in1": lit}) wf = dyn.dispatch_execute(ctx, lm) assert wf.nodes[0].inputs[ 0].binding.scalar.blob.uri == "s3://anything"
def _calculate_cache_key(task_name: str, cache_version: str, input_literal_map: LiteralMap) -> str: # Traverse the literals and replace the literal with a new literal that only contains the hash literal_map_overridden = {} for key, literal in input_literal_map.literals.items(): literal_map_overridden[key] = _recursive_hash_placement(literal) # Pickle the literal map and use base64 encoding to generate a representation of it b64_encoded = base64.b64encode( cloudpickle.dumps(LiteralMap(literal_map_overridden))) return f"{task_name}-{cache_version}-{b64_encoded}"
def to_literal( self, ctx: FlyteContext, python_val: typing.Any, python_type: Type[dict], expected: LiteralType ) -> Literal: if expected and expected.simple and expected.simple == SimpleType.STRUCT: return self.dict_to_generic_literal(python_val) lit_map = {} for k, v in python_val.items(): if type(k) != str: raise ValueError("Flyte MapType expects all keys to be strings") k_type, v_type = self.get_dict_types(python_type) lit_map[k] = TypeEngine.to_literal(ctx, v, v_type, expected.map_value_type) return Literal(map=LiteralMap(literals=lit_map))
def extract_outputs(nb: str) -> LiteralMap: """ Parse Outputs from Notebook. This looks for a cell, with the tag "outputs" to be present. """ with open(nb) as json_file: data = json.load(json_file) for p in data["cells"]: meta = p["metadata"] if "outputs" in meta["tags"]: outputs = " ".join(p["outputs"][0]["data"]["text/plain"]) m = _pb2_LiteralMap() _text_format.Parse(outputs, m) return LiteralMap.from_flyte_idl(m) return None
def _recursive_hash_placement(literal: Literal) -> Literal: if literal.collection is not None: literals = [ _recursive_hash_placement(literal) for literal in literal.collection.literals ] return Literal(collection=LiteralCollection(literals=literals)) elif literal.map is not None: literal_map = {} for key, literal in literal.map.literals.items(): literal_map[key] = _recursive_hash_placement(literal) return Literal(map=LiteralMap(literal_map)) # Base case if literal.hash is not None: return Literal(hash=literal.hash) else: return literal
def record_outputs(**kwargs) -> str: """ Use this method to record outputs from a notebook. It will convert all outputs to a Flyte understandable format. For Files, Directories, please use FlyteFile or FlyteDirectory, or wrap up your paths in these decorators. """ if kwargs is None: return "" m = {} ctx = FlyteContext.current_context() for k, v in kwargs.items(): expected = TypeEngine.to_literal_type(type(v)) lit = TypeEngine.to_literal(ctx, python_type=type(v), python_val=v, expected=expected) m[k] = lit return LiteralMap(literals=m).to_flyte_idl()
def extract_outputs(nb: str) -> LiteralMap: """ Parse Outputs from Notebook. This looks for a cell, with the tag "outputs" to be present. """ with open(nb) as json_file: data = json.load(json_file) for p in data["cells"]: meta = p["metadata"] if "outputs" in meta["tags"]: # Sometimes log messages will be in the list of outputs, so iterate to find where # the data is. for record in p["outputs"]: if "data" in record: outputs = " ".join(record["data"]["text/plain"]) m = _pb2_LiteralMap() _text_format.Parse(outputs, m) return LiteralMap.from_flyte_idl(m) return None
def test_dont_convert_remotes(): @task def t1(in1: FlyteFile): print(in1) @dynamic def dyn(in1: FlyteFile): t1(in1=in1) fd = FlyteFile("s3://anything") with context_manager.FlyteContextManager.with_context( context_manager.FlyteContextManager.current_context().with_serialization_settings( flytekit.configuration.SerializationSettings( project="test_proj", domain="test_domain", version="abc", image_config=ImageConfig(Image(name="name", fqn="image", tag="name")), env={}, ) ) ): ctx = context_manager.FlyteContextManager.current_context() with context_manager.FlyteContextManager.with_context( ctx.with_execution_state(ctx.new_execution_state().with_params(mode=ExecutionState.Mode.TASK_EXECUTION)) ) as ctx: lit = TypeEngine.to_literal( ctx, fd, FlyteFile, BlobType("", dimensionality=BlobType.BlobDimensionality.SINGLE) ) lm = LiteralMap(literals={"in1": lit}) wf = dyn.dispatch_execute(ctx, lm) assert wf.nodes[0].inputs[0].binding.scalar.blob.uri == "s3://anything" with pytest.raises(TypeError, match="No automatic conversion found from type <class 'int'>"): TypeEngine.to_literal( ctx, 3, FlyteFile, BlobType("", dimensionality=BlobType.BlobDimensionality.SINGLE) )
def test_dict_transformer(): d = DictTransformer() def assert_struct(lit: LiteralType): assert lit is not None assert lit.simple == SimpleType.STRUCT def recursive_assert(lit: LiteralType, expected: LiteralType, expected_depth: int = 1, curr_depth: int = 0): assert curr_depth <= expected_depth assert lit is not None if lit.map_value_type is None: assert lit == expected return recursive_assert(lit.map_value_type, expected, expected_depth, curr_depth + 1) # Type inference assert_struct(d.get_literal_type(dict)) assert_struct(d.get_literal_type(typing.Dict[int, int])) recursive_assert(d.get_literal_type(typing.Dict[str, str]), LiteralType(simple=SimpleType.STRING)) recursive_assert(d.get_literal_type(typing.Dict[str, int]), LiteralType(simple=SimpleType.INTEGER)) recursive_assert(d.get_literal_type(typing.Dict[str, datetime.datetime]), LiteralType(simple=SimpleType.DATETIME)) recursive_assert(d.get_literal_type(typing.Dict[str, datetime.timedelta]), LiteralType(simple=SimpleType.DURATION)) recursive_assert(d.get_literal_type(typing.Dict[str, dict]), LiteralType(simple=SimpleType.STRUCT)) recursive_assert( d.get_literal_type(typing.Dict[str, typing.Dict[str, str]]), LiteralType(simple=SimpleType.STRING), expected_depth=2, ) recursive_assert( d.get_literal_type(typing.Dict[str, typing.Dict[int, str]]), LiteralType(simple=SimpleType.STRUCT), expected_depth=2, ) recursive_assert( d.get_literal_type(typing.Dict[str, typing.Dict[str, typing.Dict[str, str]]]), LiteralType(simple=SimpleType.STRING), expected_depth=3, ) recursive_assert( d.get_literal_type(typing.Dict[str, typing.Dict[str, typing.Dict[str, dict]]]), LiteralType(simple=SimpleType.STRUCT), expected_depth=3, ) recursive_assert( d.get_literal_type(typing.Dict[str, typing.Dict[str, typing.Dict[int, dict]]]), LiteralType(simple=SimpleType.STRUCT), expected_depth=2, ) ctx = FlyteContext.current_context() lit = d.to_literal(ctx, {}, typing.Dict, LiteralType(SimpleType.STRUCT)) pv = d.to_python_value(ctx, lit, typing.Dict) assert pv == {} # Literal to python with pytest.raises(TypeError): d.to_python_value( ctx, Literal(scalar=Scalar(primitive=Primitive(integer=10))), dict) with pytest.raises(TypeError): d.to_python_value(ctx, Literal(), dict) with pytest.raises(TypeError): d.to_python_value(ctx, Literal(map=LiteralMap(literals={"x": None})), dict) with pytest.raises(TypeError): d.to_python_value(ctx, Literal(map=LiteralMap(literals={"x": None})), typing.Dict[int, str]) d.to_python_value( ctx, Literal(map=LiteralMap( literals={ "x": Literal(scalar=Scalar(primitive=Primitive(integer=1))) })), typing.Dict[str, int], )
def test_interface(): ctx = FlyteContextManager.current_context() lt = TypeEngine.to_literal_type(pd.DataFrame) df = pd.DataFrame({"name": ["Tom", "Joseph"], "age": [20, 22]}) annotated_sd_type = Annotated[StructuredDataset, kwtypes(name=str, age=int)] df_literal_type = TypeEngine.to_literal_type(annotated_sd_type) assert df_literal_type.structured_dataset_type is not None assert len(df_literal_type.structured_dataset_type.columns) == 2 assert df_literal_type.structured_dataset_type.columns[0].name == "name" assert df_literal_type.structured_dataset_type.columns[0].literal_type.simple is not None assert df_literal_type.structured_dataset_type.columns[1].name == "age" assert df_literal_type.structured_dataset_type.columns[1].literal_type.simple is not None sd = annotated_sd_type(df) sd_literal = TypeEngine.to_literal(ctx, sd, python_type=annotated_sd_type, expected=lt) lm = { "my_map": Literal( map=LiteralMap( literals={ "k1": Literal(scalar=Scalar(primitive=Primitive(string_value="v1"))), "k2": Literal(scalar=Scalar(primitive=Primitive(string_value="2"))), }, ) ), "my_list": Literal( collection=LiteralCollection( literals=[ Literal(scalar=Scalar(primitive=Primitive(integer=1))), Literal(scalar=Scalar(primitive=Primitive(integer=2))), Literal(scalar=Scalar(primitive=Primitive(integer=3))), ] ) ), "val_a": Literal(scalar=Scalar(primitive=Primitive(integer=21828))), "my_df": sd_literal, } variable_map = { "my_map": interface_models.Variable(type=TypeEngine.to_literal_type(typing.Dict[str, str]), description=""), "my_list": interface_models.Variable(type=TypeEngine.to_literal_type(typing.List[int]), description=""), "val_a": interface_models.Variable(type=TypeEngine.to_literal_type(int), description=""), "my_df": interface_models.Variable(type=df_literal_type, description=""), } lr = LiteralsResolver(lm, variable_map=variable_map, ctx=ctx) assert lr._ctx is ctx with pytest.raises(ValueError): lr["not"] # noqa with pytest.raises(ValueError): lr.get_literal("not") # Test that just using [] works, guessing from the Flyte type is invoked result = lr["my_list"] assert result == [1, 2, 3] # Test that using get works, guessing from the Flyte type is invoked result = lr.get("my_map") assert result == { "k1": "v1", "k2": "2", } # Getting the literal will return the Literal object itself assert lr.get_literal("my_df") is sd_literal guessed_df = lr["my_df"] # Based on guessing, so no column information assert len(guessed_df.metadata.structured_dataset_type.columns) == 0 guessed_df_2 = lr["my_df"] assert guessed_df is guessed_df_2 # Update type hints with the annotated type lr.update_type_hints({"my_df": annotated_sd_type}) del lr._native_values["my_df"] guessed_df = lr.get("my_df") # Using the user specified type, so number of columns is correct. assert len(guessed_df.metadata.structured_dataset_type.columns) == 2
collection=LiteralCollection( literals=[ Literal(scalar=Scalar(primitive=Primitive(integer=1))), Literal(scalar=Scalar(primitive=Primitive(integer=2))), Literal(scalar=Scalar(primitive=Primitive(integer=3))), ] ) ), typing.List[int], [1, 2, 3], ), ( Literal( map=LiteralMap( literals={ "k1": Literal(scalar=Scalar(primitive=Primitive(string_value="v1"))), "k2": Literal(scalar=Scalar(primitive=Primitive(string_value="2"))), }, ) ), typing.Dict[str, str], {"k1": "v1", "k2": "2"}, ), ], ) def test_literals_resolver(literal_value, python_type, expected_python_value): lit_dict = {"a": literal_value} lr = LiteralsResolver(lit_dict) out = lr.get("a", python_type) assert out == expected_python_value