def extract_value( ctx: FlyteContext, input_val: Any, val_type: type, flyte_literal_type: _type_models.LiteralType ) -> _literal_models.Literal: if isinstance(input_val, list): if flyte_literal_type.collection_type is None: raise Exception( f"Not a collection type {flyte_literal_type} but got a list {input_val}" ) try: sub_type = ListTransformer.get_sub_type(val_type) except ValueError: if len(input_val) == 0: raise sub_type = type(input_val[0]) literals = [ extract_value(ctx, v, sub_type, flyte_literal_type.collection_type) for v in input_val ] return _literal_models.Literal( collection=_literal_models.LiteralCollection( literals=literals)) elif isinstance(input_val, dict): if (flyte_literal_type.map_value_type is None and flyte_literal_type.simple != _type_models.SimpleType.STRUCT): raise Exception( f"Not a map type {flyte_literal_type} but got a map {input_val}" ) k_type, sub_type = DictTransformer.get_dict_types(val_type) if flyte_literal_type.simple == _type_models.SimpleType.STRUCT: return TypeEngine.to_literal(ctx, input_val, type(input_val), flyte_literal_type) else: literals = { k: extract_value(ctx, v, sub_type, flyte_literal_type.map_value_type) for k, v in input_val.items() } return _literal_models.Literal(map=_literal_models.LiteralMap( literals=literals)) elif isinstance(input_val, Promise): # In the example above, this handles the "in2=a" type of argument return input_val.val elif isinstance(input_val, VoidPromise): raise AssertionError( f"Outputs of a non-output producing task {input_val.task_name} cannot be passed to another task." ) elif isinstance(input_val, tuple): raise AssertionError( "Tuples are not a supported type for individual values in Flyte - got a tuple -" f" {input_val}. If using named tuple in an inner task, please, de-reference the" "actual attribute that you want to use. For example, in NamedTuple('OP', x=int) then" "return v.x, instead of v, even if this has a single element") else: # This handles native values, the 5 example return TypeEngine.to_literal(ctx, input_val, val_type, flyte_literal_type)
def binding_data_from_python_std( ctx: _flyte_context.FlyteContext, expected_literal_type: _type_models.LiteralType, t_value: typing.Any, t_value_type: type, ) -> _literals_models.BindingData: # This handles the case where the given value is the output of another task if isinstance(t_value, Promise): if not t_value.is_ready: return _literals_models.BindingData(promise=t_value.ref) elif isinstance(t_value, VoidPromise): raise AssertionError( f"Cannot pass output from task {t_value.task_name} that produces no outputs to a downstream task" ) elif isinstance(t_value, list): if expected_literal_type.collection_type is None: raise AssertionError( f"this should be a list and it is not: {type(t_value)} vs {expected_literal_type}" ) sub_type = ListTransformer.get_sub_type(t_value_type) collection = _literals_models.BindingDataCollection(bindings=[ binding_data_from_python_std( ctx, expected_literal_type.collection_type, t, sub_type) for t in t_value ]) return _literals_models.BindingData(collection=collection) elif isinstance(t_value, dict): if (expected_literal_type.map_value_type is None and expected_literal_type.simple != _type_models.SimpleType.STRUCT): raise AssertionError( f"this should be a Dictionary type and it is not: {type(t_value)} vs {expected_literal_type}" ) k_type, v_type = DictTransformer.get_dict_types(t_value_type) if expected_literal_type.simple == _type_models.SimpleType.STRUCT: lit = TypeEngine.to_literal(ctx, t_value, type(t_value), expected_literal_type) return _literals_models.BindingData(scalar=lit.scalar) else: m = _literals_models.BindingDataMap( bindings={ k: binding_data_from_python_std( ctx, expected_literal_type.map_value_type, v, v_type) for k, v in t_value.items() }) return _literals_models.BindingData(map=m) # This is the scalar case - e.g. my_task(in1=5) scalar = TypeEngine.to_literal(ctx, t_value, t_value_type, expected_literal_type).scalar return _literals_models.BindingData(scalar=scalar)
def test_assert_type(): ctx = context_manager.FlyteContextManager.current_context() with context_manager.FlyteContextManager.with_context( ctx.with_execution_state(ctx.new_execution_state().with_params(mode=ExecutionState.Mode.TASK_EXECUTION)) ) as ctx: schema = FlyteSchema[kwtypes(x=int, y=float)] fst = FlyteSchemaTransformer() lt = fst.get_literal_type(schema) with pytest.raises(ValueError, match="DataFrames of type <class 'int'> are not supported currently"): TypeEngine.to_literal(ctx, 3, schema, lt)
def extract_value( ctx: FlyteContext, input_val: Any, val_type: type, flyte_literal_type: _type_models.LiteralType ) -> _literal_models.Literal: if isinstance(input_val, list): if flyte_literal_type.collection_type is None: raise Exception( f"Not a collection type {flyte_literal_type} but got a list {input_val}" ) try: sub_type = ListTransformer.get_sub_type(val_type) except ValueError: if len(input_val) == 0: raise sub_type = type(input_val[0]) literals = [ extract_value(ctx, v, sub_type, flyte_literal_type.collection_type) for v in input_val ] return _literal_models.Literal( collection=_literal_models.LiteralCollection( literals=literals)) elif isinstance(input_val, dict): if (flyte_literal_type.map_value_type is None and flyte_literal_type.simple != _type_models.SimpleType.STRUCT): raise Exception( f"Not a map type {flyte_literal_type} but got a map {input_val}" ) k_type, sub_type = DictTransformer.get_dict_types(val_type) if flyte_literal_type.simple == _type_models.SimpleType.STRUCT: return TypeEngine.to_literal(ctx, input_val, type(input_val), flyte_literal_type) else: literals = { k: extract_value(ctx, v, sub_type, flyte_literal_type.map_value_type) for k, v in input_val.items() } return _literal_models.Literal(map=_literal_models.LiteralMap( literals=literals)) elif isinstance(input_val, Promise): # In the example above, this handles the "in2=a" type of argument return input_val.val elif isinstance(input_val, VoidPromise): raise AssertionError( f"Outputs of a non-output producing task {input_val.task_name} cannot be passed to another task." ) else: # This handles native values, the 5 example return TypeEngine.to_literal(ctx, input_val, val_type, flyte_literal_type)
def test_format_correct(): class TempEncoder(StructuredDatasetEncoder): def __init__(self): super().__init__(pd.DataFrame, S3, "avro") def encode( self, ctx: FlyteContext, structured_dataset: StructuredDataset, structured_dataset_type: StructuredDatasetType, ) -> literals.StructuredDataset: return literals.StructuredDataset( uri="/tmp/avro", metadata=StructuredDatasetMetadata(structured_dataset_type)) ctx = FlyteContextManager.current_context() df = pd.DataFrame({"name": ["Tom", "Joseph"], "age": [20, 22]}) annotated_sd_type = Annotated[StructuredDataset, "avro", kwtypes(name=str, age=int)] df_literal_type = TypeEngine.to_literal_type(annotated_sd_type) assert df_literal_type.structured_dataset_type is not None assert len(df_literal_type.structured_dataset_type.columns) == 2 assert df_literal_type.structured_dataset_type.columns[0].name == "name" assert df_literal_type.structured_dataset_type.columns[ 0].literal_type.simple is not None assert df_literal_type.structured_dataset_type.columns[1].name == "age" assert df_literal_type.structured_dataset_type.columns[ 1].literal_type.simple is not None assert df_literal_type.structured_dataset_type.format == "avro" sd = annotated_sd_type(df) with pytest.raises(ValueError): TypeEngine.to_literal(ctx, sd, python_type=annotated_sd_type, expected=df_literal_type) StructuredDatasetTransformerEngine.register(TempEncoder(), default_for_type=False) sd2 = annotated_sd_type(df) sd_literal = TypeEngine.to_literal(ctx, sd2, python_type=annotated_sd_type, expected=df_literal_type) assert sd_literal.scalar.structured_dataset.metadata.structured_dataset_type.format == "avro" @task def t1() -> Annotated[StructuredDataset, "avro"]: return StructuredDataset(dataframe=df) assert t1().file_format == "avro"
def test_create_native_named_tuple(): ctx = FlyteContextManager.current_context() t = create_native_named_tuple(ctx, promises=None, entity_interface=Interface()) assert t is None p1 = Promise(var="x", val=TypeEngine.to_literal( ctx, 1, int, LiteralType(simple=SimpleType.INTEGER))) p2 = Promise(var="y", val=TypeEngine.to_literal( ctx, 2, int, LiteralType(simple=SimpleType.INTEGER))) t = create_native_named_tuple( ctx, promises=p1, entity_interface=Interface(outputs={"x": int})) assert t assert t == 1 t = create_native_named_tuple(ctx, promises=[], entity_interface=Interface()) assert t is None t = create_native_named_tuple(ctx, promises=[p1, p2], entity_interface=Interface(outputs={ "x": int, "y": int })) assert t assert t == (1, 2) t = create_native_named_tuple(ctx, promises=[p1, p2], entity_interface=Interface( outputs={ "x": int, "y": int }, output_tuple_name="Tup")) assert t assert t == (1, 2) assert t.__class__.__name__ == "Tup" with pytest.raises(KeyError): create_native_named_tuple(ctx, promises=[p1, p2], entity_interface=Interface( outputs={"x": int}, output_tuple_name="Tup"))
def test_transformer_to_literal_local(): random_dir = context_manager.FlyteContext.current_context().file_access.get_random_local_directory() fs = FileAccessProvider(local_sandbox_dir=random_dir, raw_output_prefix=os.path.join(random_dir, "raw")) ctx = context_manager.FlyteContext.current_context() with context_manager.FlyteContextManager.with_context(ctx.with_file_access(fs)) as ctx: # Use a separate directory that we know won't be the same as anything generated by flytekit itself, lest we # accidentally try to cp -R /some/folder /some/folder/sub which causes exceptions obviously. p = "/tmp/flyte/test_fd_transformer" # Create an empty directory and call to literal on it if os.path.exists(p): shutil.rmtree(p) pathlib.Path(p).mkdir(parents=True) tf = FlyteDirToMultipartBlobTransformer() lt = tf.get_literal_type(FlyteDirectory) literal = tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt) assert literal.scalar.blob.uri.startswith(random_dir) # Create a director with one file in it if os.path.exists(p): shutil.rmtree(p) pathlib.Path(p).mkdir(parents=True) with open(os.path.join(p, "xyz"), "w") as fh: fh.write("Hello world\n") literal = tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt) mock_remote_files = os.listdir(literal.scalar.blob.uri) assert mock_remote_files == ["xyz"] # The only primitives allowed are strings with pytest.raises(AssertionError): tf.to_literal(ctx, 3, FlyteDirectory, lt) with pytest.raises(TypeError, match="No automatic conversion from <class 'int'>"): TypeEngine.to_literal(ctx, 3, FlyteDirectory, lt) # Can't use if it's not a directory with pytest.raises(FlyteAssertion): p = "/tmp/flyte/xyz" path = pathlib.Path(p) try: path.unlink() except OSError: ... with open(p, "w") as fh: fh.write("hello world\n") tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt)
def test_comparison_lits(): px = Promise("x", TypeEngine.to_literal(None, 5, int, None)) py = Promise("y", TypeEngine.to_literal(None, 8, int, None)) def eval_expr(expr, expected: bool): print(f"{expr} evals to {expr.eval()}") assert expected == expr.eval() eval_expr(px == py, False) eval_expr(px < py, True) eval_expr((px == py) & (px < py), False) eval_expr(((px == py) & (px < py)) | (px > py), False) eval_expr(px < 5, False) eval_expr(px >= 5, True) eval_expr(py >= 5, True)
def transform_inputs_to_parameters( ctx: context_manager.FlyteContext, interface: Interface) -> _interface_models.ParameterMap: """ Transforms the given interface (with inputs) to a Parameter Map with defaults set :param interface: the interface object """ if interface is None or interface.inputs_with_defaults is None: return _interface_models.ParameterMap({}) inputs_vars = transform_variable_map(interface.inputs) params = {} inputs_with_def = interface.inputs_with_defaults for k, v in inputs_vars.items(): val, _default = inputs_with_def[k] required = _default is None default_lv = None if _default is not None: default_lv = TypeEngine.to_literal(ctx, _default, python_type=interface.inputs[k], expected=v.type) params[k] = _interface_models.Parameter(var=v, default=default_lv, required=required) return _interface_models.ParameterMap(params)
def test_enum_type(): t = TypeEngine.to_literal_type(Color) assert t is not None assert t.enum_type is not None assert t.enum_type.values assert t.enum_type.values == [c.value for c in Color] ctx = FlyteContextManager.current_context() lv = TypeEngine.to_literal(ctx, Color.RED, Color, TypeEngine.to_literal_type(Color)) assert lv assert lv.scalar assert lv.scalar.primitive.string_value == "red" v = TypeEngine.to_python_value(ctx, lv, Color) assert v assert v == Color.RED v = TypeEngine.to_python_value(ctx, lv, str) assert v assert v == "red" with pytest.raises(ValueError): TypeEngine.to_python_value(ctx, Literal(scalar=Scalar(primitive=Primitive(string_value=str(Color.RED)))), Color) with pytest.raises(ValueError): TypeEngine.to_python_value(ctx, Literal(scalar=Scalar(primitive=Primitive(string_value="bad"))), Color) with pytest.raises(AssertionError): TypeEngine.to_literal_type(UnsupportedEnumValues)
def test_engine_file_output(): basic_blob_type = _core_types.BlobType( format="", dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE, ) fs = FileAccessProvider(local_sandbox_dir="/tmp/flytetesting") with context_manager.FlyteContext.current_context( ).new_file_access_context(file_access_provider=fs) as ctx: # Write some text to a file not in that directory above test_file_location = "/tmp/sample.txt" with open(test_file_location, "w") as fh: fh.write("Hello World\n") lit = TypeEngine.to_literal(ctx, test_file_location, os.PathLike, LiteralType(blob=basic_blob_type)) # Since we're using local as remote, we should be able to just read the file from the 'remote' location. with open(lit.scalar.blob.uri, "r") as fh: assert fh.readline() == "Hello World\n" # We should also be able to turn the thing back into regular python native thing. redownloaded_local_file_location = TypeEngine.to_python_value( ctx, lit, os.PathLike) with open(redownloaded_local_file_location, "r") as fh: assert fh.readline() == "Hello World\n"
def test_dont_convert_remotes(): @task def t1(in1: FlyteFile): print(in1) @dynamic def dyn(in1: FlyteFile): t1(in1=in1) fd = FlyteFile("s3://anything") with context_manager.FlyteContext.current_context( ).new_serialization_settings( serialization_settings=context_manager.SerializationSettings( project="test_proj", domain="test_domain", version="abc", image_config=ImageConfig( Image(name="name", fqn="image", tag="name")), env={}, )) as ctx: with ctx.new_execution_context( mode=ExecutionState.Mode.TASK_EXECUTION) as ctx: lit = TypeEngine.to_literal( ctx, fd, FlyteFile, BlobType("", dimensionality=BlobType.BlobDimensionality.SINGLE)) lm = LiteralMap(literals={"in1": lit}) wf = dyn.dispatch_execute(ctx, lm) assert wf.nodes[0].inputs[ 0].binding.scalar.blob.uri == "s3://anything"
def _workflow_fn_outputs_to_promise( ctx: FlyteContext, native_outputs: typing.Dict[str, type], # Actually an orderedDict typed_outputs: Dict[str, _interface_models.Variable], outputs: Union[Any, Tuple[Any]], ) -> List[Promise]: if len(native_outputs) == 1: if isinstance(outputs, tuple): if len(outputs) != 1: raise AssertionError( f"The Workflow specification indicates only one return value, received {len(outputs)}" ) else: outputs = (outputs, ) if len(native_outputs) > 1: if not isinstance(outputs, tuple) or len(native_outputs) != len(outputs): # Length check, clean up exception raise AssertionError( f"The workflow specification indicates {len(native_outputs)} return vals, but received {len(outputs)}" ) # This recasts the Promises provided by the outputs of the workflow's tasks into the correct output names # of the workflow itself return_vals = [] for (k, t), v in zip(native_outputs.items(), outputs): if isinstance(v, Promise): return_vals.append(v.with_var(k)) else: # Found a return type that is not a promise, so we need to transform it var = typed_outputs[k] return_vals.append( Promise(var=k, val=TypeEngine.to_literal(ctx, v, t, var.type))) return return_vals
def _local_execute(self, ctx: FlyteContext, **kwargs) -> Union[Tuple[Promise], Promise, VoidPromise]: # This is done to support the invariant that Workflow local executions always work with Promise objects # holding Flyte literal values. Even in a wf, a user can call a sub-workflow with a Python native value. for k, v in kwargs.items(): if not isinstance(v, Promise): t = self.python_interface.inputs[k] kwargs[k] = Promise(var=k, val=TypeEngine.to_literal(ctx, v, t, self.interface.inputs[k].type)) # The output of this will always be a combination of Python native values and Promises containing Flyte # Literals. function_outputs = self.execute(**kwargs) # First handle the empty return case. # A workflow function may return a task that doesn't return anything # def wf(): # return t1() # or it may not return at all # def wf(): # t1() # In the former case we get the task's VoidPromise, in the latter we get None if isinstance(function_outputs, VoidPromise) or function_outputs is None: if len(self.python_interface.outputs) != 0: raise FlyteValueException( function_outputs, f"{function_outputs} received but interface has {len(self.python_interface.outputs)} outputs.", ) return VoidPromise(self.name) # Because we should've already returned in the above check, we just raise an error here. if len(self.python_interface.outputs) == 0: raise FlyteValueException( function_outputs, f"{function_outputs} received but should've been VoidPromise or None." ) expected_output_names = list(self.python_interface.outputs.keys()) if len(expected_output_names) == 1: # Here we have to handle the fact that the wf could've been declared with a typing.NamedTuple of # length one. That convention is used for naming outputs - and single-length-NamedTuples are # particularly troublesome but elegant handling of them is not a high priority # Again, we're using the output_tuple_name as a proxy. if self.python_interface.output_tuple_name and isinstance(function_outputs, tuple): wf_outputs_as_map = {expected_output_names[0]: function_outputs[0]} else: wf_outputs_as_map = {expected_output_names[0]: function_outputs} else: wf_outputs_as_map = {expected_output_names[i]: function_outputs[i] for i, _ in enumerate(function_outputs)} # Basically we need to repackage the promises coming from the tasks into Promises that match the workflow's # interface. We do that by extracting out the literals, and creating new Promises wf_outputs_as_literal_dict = translate_inputs_to_literals( ctx, wf_outputs_as_map, flyte_interface_types=self.interface.outputs, native_types=self.python_interface.outputs, ) # Recreate new promises that use the workflow's output names. new_promises = [Promise(var, wf_outputs_as_literal_dict[var]) for var in expected_output_names] return create_task_output(new_promises, self.python_interface)
def convert_to_literal(self, ctx: typing.Optional[click.Context], param: typing.Optional[click.Parameter], value: typing.Any) -> Literal: if self._literal_type.structured_dataset_type: return self.convert_to_structured_dataset(ctx, param, value) if self._literal_type.blob: return self.convert_to_blob(ctx, param, value) if self._literal_type.collection_type or self._literal_type.map_value_type: # TODO Does not support nested flytefile, flyteschema types v = json.loads(value) if isinstance(value, str) else value if self._literal_type.collection_type and not isinstance(v, list): raise click.BadParameter( f"Expected json list '[...]', parsed value is {type(v)}") if self._literal_type.map_value_type and not isinstance(v, dict): raise click.BadParameter( "Expected json map '{}', parsed value is {%s}" % type(v)) return TypeEngine.to_literal(self._flyte_ctx, v, self._python_type, self._literal_type) if self._literal_type.union_type: return self.convert_to_union(ctx, param, value) if self._literal_type.simple or self._literal_type.enum_type: if self._literal_type.simple and self._literal_type.simple == SimpleType.STRUCT: if type(value) != self._python_type: o = cast(DataClassJsonMixin, self._python_type).from_json(value) else: o = value return TypeEngine.to_literal(self._flyte_ctx, o, self._python_type, self._literal_type) return Literal( scalar=self._converter.convert(value, self._python_type)) if self._literal_type.schema: raise DeprecationWarning( "Schema Types are not supported in pyflyte run. Use StructuredDataset instead." ) raise NotImplementedError( f"CLI parsing is not available for Python Type:`{self._python_type}`, LiteralType:`{self._literal_type}`." )
def test_dataclass_complex_transform(two_sample_inputs): my_input = two_sample_inputs[0] my_input_2 = two_sample_inputs[1] ctx = FlyteContextManager.current_context() literal_type = TypeEngine.to_literal_type(MyInput) first_literal = TypeEngine.to_literal(ctx, my_input, MyInput, literal_type) assert first_literal.scalar.generic["apriori_config"] is not None converted_back_1 = TypeEngine.to_python_value(ctx, first_literal, MyInput) assert converted_back_1.apriori_config is not None second_literal = TypeEngine.to_literal(ctx, converted_back_1, MyInput, literal_type) assert second_literal.scalar.generic["apriori_config"] is not None converted_back_2 = TypeEngine.to_python_value(ctx, second_literal, MyInput) assert converted_back_2.apriori_config is not None input_list = [my_input, my_input_2] input_list_type = TypeEngine.to_literal_type(List[MyInput]) literal_list = TypeEngine.to_literal(ctx, input_list, List[MyInput], input_list_type) assert literal_list.collection.literals[0].scalar.generic["apriori_config"] is not None assert literal_list.collection.literals[1].scalar.generic["apriori_config"] is not None
def test_dont_convert_remotes(): @task def t1(in1: FlyteFile): print(in1) @dynamic def dyn(in1: FlyteFile): t1(in1=in1) fd = FlyteFile("s3://anything") with context_manager.FlyteContextManager.with_context( context_manager.FlyteContextManager.current_context().with_serialization_settings( flytekit.configuration.SerializationSettings( project="test_proj", domain="test_domain", version="abc", image_config=ImageConfig(Image(name="name", fqn="image", tag="name")), env={}, ) ) ): ctx = context_manager.FlyteContextManager.current_context() with context_manager.FlyteContextManager.with_context( ctx.with_execution_state(ctx.new_execution_state().with_params(mode=ExecutionState.Mode.TASK_EXECUTION)) ) as ctx: lit = TypeEngine.to_literal( ctx, fd, FlyteFile, BlobType("", dimensionality=BlobType.BlobDimensionality.SINGLE) ) lm = LiteralMap(literals={"in1": lit}) wf = dyn.dispatch_execute(ctx, lm) assert wf.nodes[0].inputs[0].binding.scalar.blob.uri == "s3://anything" with pytest.raises(TypeError, match="No automatic conversion found from type <class 'int'>"): TypeEngine.to_literal( ctx, 3, FlyteFile, BlobType("", dimensionality=BlobType.BlobDimensionality.SINGLE) )
def unwrap_literal_map_and_execute( self, ctx: FlyteContext, input_literal_map: _literal_models.LiteralMap ) -> Union[VoidPromise, _literal_models.LiteralMap, _dynamic_job.DynamicJobSpec]: """ Please see the implementation of the dispatch_execute function in the real task. """ # Invoked before the task is executed # Translate the input literals to Python native native_inputs = TypeEngine.literal_map_to_kwargs( ctx, input_literal_map, self.python_interface.inputs) logger.info(f"Invoking {self.name} with inputs: {native_inputs}") try: native_outputs = self.execute(**native_inputs) except Exception as e: logger.exception(f"Exception when executing {e}") raise e logger.info( f"Task executed successfully in user level, outputs: {native_outputs}" ) expected_output_names = list(self.python_interface.outputs.keys()) if len(expected_output_names) == 1: native_outputs_as_map = {expected_output_names[0]: native_outputs} elif len(expected_output_names) == 0: native_outputs_as_map = {} else: native_outputs_as_map = { expected_output_names[i]: native_outputs[i] for i, _ in enumerate(native_outputs) } # We manually construct a LiteralMap here because task inputs and outputs actually violate the assumption # built into the IDL that all the values of a literal map are of the same type. literals = {} for k, v in native_outputs_as_map.items(): literal_type = self.interface.outputs[k].type py_type = self.python_interface.outputs[k] if isinstance(v, tuple): raise AssertionError( f"Output({k}) in task{self.name} received a tuple {v}, instead of {py_type}" ) literals[k] = TypeEngine.to_literal(ctx, v, py_type, literal_type) outputs_literal_map = _literal_models.LiteralMap(literals=literals) # After the execute has been successfully completed return outputs_literal_map
def test_protos(): ctx = FlyteContext.current_context() pb = errors_pb2.ContainerError(code="code", message="message") lt = TypeEngine.to_literal_type(errors_pb2.ContainerError) assert lt.simple == SimpleType.STRUCT assert lt.metadata["pb_type"] == "flyteidl.core.errors_pb2.ContainerError" lit = TypeEngine.to_literal(ctx, pb, errors_pb2.ContainerError, lt) new_python_val = TypeEngine.to_python_value(ctx, lit, errors_pb2.ContainerError) assert new_python_val == pb # Test error l0 = Literal(scalar=Scalar(primitive=Primitive(integer=4))) with pytest.raises(AssertionError): TypeEngine.to_python_value(ctx, l0, errors_pb2.ContainerError)
def test_str_input(folders_and_files_setup): proxy_c = MyProxyConfiguration(splat_data_dir="/tmp/proxy_splat", apriori_file="/opt/config/a_file") proxy_p = MyProxyParameters(id="pp_id", job_i_step=1) # Intentionally passing in the wrong type my_input = MyInput( main_product=folders_and_files_setup[0], # noqa apriori_config=MyAprioriConfiguration( static_data_dir=FlyteDirectory("gs://my-bucket/one"), external_data_dir=FlyteDirectory("gs://my-bucket/two"), ), proxy_config=proxy_c, proxy_params=proxy_p, ) ctx = FlyteContextManager.current_context() literal_type = TypeEngine.to_literal_type(MyInput) first_literal = TypeEngine.to_literal(ctx, my_input, MyInput, literal_type) assert first_literal.scalar.generic is not None
def _local_execute( self, ctx: FlyteContext, **kwargs) -> Union[Tuple[Promise], Promise, VoidPromise]: """ Performs local execution of a workflow. kwargs are expected to be Promises for the most part (unless, someone has hardcoded in my_wf(input_1=5) or something). :param ctx: The FlyteContext :param kwargs: parameters for the workflow itself """ logger.info( f"Executing Workflow {self._name}, ctx{ctx.execution_state.Mode}") # This is done to support the invariant that Workflow local executions always work with Promise objects # holding Flyte literal values. Even in a wf, a user can call a sub-workflow with a Python native value. for k, v in kwargs.items(): if not isinstance(v, Promise): t = self._native_interface.inputs[k] kwargs[k] = Promise(var=k, val=TypeEngine.to_literal( ctx, v, t, self.interface.inputs[k].type)) function_outputs = self.execute(**kwargs) if (isinstance(function_outputs, VoidPromise) or function_outputs is None or len(self.python_interface.outputs) == 0): # The reason this is here is because a workflow function may return a task that doesn't return anything # def wf(): # return t1() # or it may not return at all # def wf(): # t1() # In the former case we get the task's VoidPromise, in the latter we get None return VoidPromise(self.name) # TODO: Can we refactor the task code to be similar to what's in this function? promises = _workflow_fn_outputs_to_promise( ctx, self._native_interface.outputs, self.interface.outputs, function_outputs) # TODO: With the native interface, create_task_output should be able to derive the typed interface, and it # should be able to do the conversion of the output of the execute() call directly. return create_task_output(promises, self._native_interface)
def transform_inputs_to_parameters( ctx: context_manager.FlyteContext, interface: Interface) -> _interface_models.ParameterMap: """ Transforms the given interface (with inputs) to a Parameter Map with defaults set :param interface: the interface object """ if interface is None or interface.inputs_with_defaults is None: return _interface_models.ParameterMap({}) if interface.docstring is None: inputs_vars = transform_variable_map(interface.inputs) else: inputs_vars = transform_variable_map( interface.inputs, interface.docstring.input_descriptions) params = {} inputs_with_def = interface.inputs_with_defaults for k, v in inputs_vars.items(): val, _default = inputs_with_def[k] if _default is None and get_origin(val) is typing.Union and type( None) in get_args(val): from flytekit import Literal, Scalar literal = Literal(scalar=Scalar(none_type=Void())) params[k] = _interface_models.Parameter(var=v, default=literal, required=False) else: required = _default is None default_lv = None if _default is not None: default_lv = TypeEngine.to_literal( ctx, _default, python_type=interface.inputs[k], expected=v.type) params[k] = _interface_models.Parameter(var=v, default=default_lv, required=required) return _interface_models.ParameterMap(params)
def dispatch_execute( self, ctx: FlyteContext, input_literal_map: _literal_models.LiteralMap ) -> Union[_literal_models.LiteralMap, _dynamic_job.DynamicJobSpec]: """ This method translates Flyte's Type system based input values and invokes the actual call to the executor This method is also invoked during runtime. * ``VoidPromise`` is returned in the case when the task itself declares no outputs. * ``Literal Map`` is returned when the task returns either one more outputs in the declaration. Individual outputs may be none * ``DynamicJobSpec`` is returned when a dynamic workflow is executed """ # Invoked before the task is executed new_user_params = self.pre_execute(ctx.user_space_params) # Create another execution context with the new user params, but let's keep the same working dir with ctx.new_execution_context( mode=ctx.execution_state.mode, execution_params=new_user_params, working_dir=ctx.execution_state.working_dir, ) as exec_ctx: # TODO We could support default values here too - but not part of the plan right now # Translate the input literals to Python native native_inputs = TypeEngine.literal_map_to_kwargs( exec_ctx, input_literal_map, self.python_interface.inputs) # TODO: Logger should auto inject the current context information to indicate if the task is running within # a workflow or a subworkflow etc logger.info(f"Invoking {self.name} with inputs: {native_inputs}") try: native_outputs = self.execute(**native_inputs) except Exception as e: logger.exception(f"Exception when executing {e}") raise e logger.info( f"Task executed successfully in user level, outputs: {native_outputs}" ) # Lets run the post_execute method. This may result in a IgnoreOutputs Exception, which is # bubbled up to be handled at the callee layer. native_outputs = self.post_execute(new_user_params, native_outputs) # Short circuit the translation to literal map because what's returned may be a dj spec (or an # already-constructed LiteralMap if the dynamic task was a no-op), not python native values if isinstance(native_outputs, _literal_models.LiteralMap) or isinstance( native_outputs, _dynamic_job.DynamicJobSpec): return native_outputs expected_output_names = list(self._outputs_interface.keys()) if len(expected_output_names) == 1: # Here we have to handle the fact that the task could've been declared with a typing.NamedTuple of # length one. That convention is used for naming outputs - and single-length-NamedTuples are # particularly troublesome but elegant handling of them is not a high priority # Again, we're using the output_tuple_name as a proxy. if self.python_interface.output_tuple_name and isinstance( native_outputs, tuple): native_outputs_as_map = { expected_output_names[0]: native_outputs[0] } else: native_outputs_as_map = { expected_output_names[0]: native_outputs } elif len(expected_output_names) == 0: native_outputs_as_map = {} else: native_outputs_as_map = { expected_output_names[i]: native_outputs[i] for i, _ in enumerate(native_outputs) } # We manually construct a LiteralMap here because task inputs and outputs actually violate the assumption # built into the IDL that all the values of a literal map are of the same type. literals = {} for k, v in native_outputs_as_map.items(): literal_type = self._outputs_interface[k].type py_type = self.get_type_for_output_var(k, v) if isinstance(v, tuple): raise AssertionError( f"Output({k}) in task{self.name} received a tuple {v}, instead of {py_type}" ) try: literals[k] = TypeEngine.to_literal( exec_ctx, v, py_type, literal_type) except Exception as e: raise AssertionError( f"failed to convert return value for var {k}") from e outputs_literal_map = _literal_models.LiteralMap(literals=literals) # After the execute has been successfully completed return outputs_literal_map
def test_interface(): ctx = FlyteContextManager.current_context() lt = TypeEngine.to_literal_type(pd.DataFrame) df = pd.DataFrame({"name": ["Tom", "Joseph"], "age": [20, 22]}) annotated_sd_type = Annotated[StructuredDataset, kwtypes(name=str, age=int)] df_literal_type = TypeEngine.to_literal_type(annotated_sd_type) assert df_literal_type.structured_dataset_type is not None assert len(df_literal_type.structured_dataset_type.columns) == 2 assert df_literal_type.structured_dataset_type.columns[0].name == "name" assert df_literal_type.structured_dataset_type.columns[0].literal_type.simple is not None assert df_literal_type.structured_dataset_type.columns[1].name == "age" assert df_literal_type.structured_dataset_type.columns[1].literal_type.simple is not None sd = annotated_sd_type(df) sd_literal = TypeEngine.to_literal(ctx, sd, python_type=annotated_sd_type, expected=lt) lm = { "my_map": Literal( map=LiteralMap( literals={ "k1": Literal(scalar=Scalar(primitive=Primitive(string_value="v1"))), "k2": Literal(scalar=Scalar(primitive=Primitive(string_value="2"))), }, ) ), "my_list": Literal( collection=LiteralCollection( literals=[ Literal(scalar=Scalar(primitive=Primitive(integer=1))), Literal(scalar=Scalar(primitive=Primitive(integer=2))), Literal(scalar=Scalar(primitive=Primitive(integer=3))), ] ) ), "val_a": Literal(scalar=Scalar(primitive=Primitive(integer=21828))), "my_df": sd_literal, } variable_map = { "my_map": interface_models.Variable(type=TypeEngine.to_literal_type(typing.Dict[str, str]), description=""), "my_list": interface_models.Variable(type=TypeEngine.to_literal_type(typing.List[int]), description=""), "val_a": interface_models.Variable(type=TypeEngine.to_literal_type(int), description=""), "my_df": interface_models.Variable(type=df_literal_type, description=""), } lr = LiteralsResolver(lm, variable_map=variable_map, ctx=ctx) assert lr._ctx is ctx with pytest.raises(ValueError): lr["not"] # noqa with pytest.raises(ValueError): lr.get_literal("not") # Test that just using [] works, guessing from the Flyte type is invoked result = lr["my_list"] assert result == [1, 2, 3] # Test that using get works, guessing from the Flyte type is invoked result = lr.get("my_map") assert result == { "k1": "v1", "k2": "2", } # Getting the literal will return the Literal object itself assert lr.get_literal("my_df") is sd_literal guessed_df = lr["my_df"] # Based on guessing, so no column information assert len(guessed_df.metadata.structured_dataset_type.columns) == 0 guessed_df_2 = lr["my_df"] assert guessed_df is guessed_df_2 # Update type hints with the annotated type lr.update_type_hints({"my_df": annotated_sd_type}) del lr._native_values["my_df"] guessed_df = lr.get("my_df") # Using the user specified type, so number of columns is correct. assert len(guessed_df.metadata.structured_dataset_type.columns) == 2
def binding_data_from_python_std( ctx: _flyte_context.FlyteContext, expected_literal_type: _type_models.LiteralType, t_value: typing.Any, t_value_type: type, ) -> _literals_models.BindingData: # This handles the case where the given value is the output of another task if isinstance(t_value, Promise): if not t_value.is_ready: return _literals_models.BindingData(promise=t_value.ref) elif isinstance(t_value, VoidPromise): raise AssertionError( f"Cannot pass output from task {t_value.task_name} that produces no outputs to a downstream task" ) elif isinstance(t_value, list): if expected_literal_type.collection_type is None: raise AssertionError( f"this should be a list and it is not: {type(t_value)} vs {expected_literal_type}" ) sub_type = ListTransformer.get_sub_type(t_value_type) collection = _literals_models.BindingDataCollection(bindings=[ binding_data_from_python_std( ctx, expected_literal_type.collection_type, t, sub_type) for t in t_value ]) return _literals_models.BindingData(collection=collection) elif isinstance(t_value, dict): if (expected_literal_type.map_value_type is None and expected_literal_type.simple != _type_models.SimpleType.STRUCT): raise AssertionError( f"this should be a Dictionary type and it is not: {type(t_value)} vs {expected_literal_type}" ) k_type, v_type = DictTransformer.get_dict_types(t_value_type) if expected_literal_type.simple == _type_models.SimpleType.STRUCT: lit = TypeEngine.to_literal(ctx, t_value, type(t_value), expected_literal_type) return _literals_models.BindingData(scalar=lit.scalar) else: m = _literals_models.BindingDataMap( bindings={ k: binding_data_from_python_std( ctx, expected_literal_type.map_value_type, v, v_type) for k, v in t_value.items() }) return _literals_models.BindingData(map=m) elif isinstance(t_value, tuple): raise AssertionError( "Tuples are not a supported type for individual values in Flyte - got a tuple -" f" {t_value}. If using named tuple in an inner task, please, de-reference the" "actual attribute that you want to use. For example, in NamedTuple('OP', x=int) then" "return v.x, instead of v, even if this has a single element") # This is the scalar case - e.g. my_task(in1=5) scalar = TypeEngine.to_literal(ctx, t_value, t_value_type, expected_literal_type).scalar return _literals_models.BindingData(scalar=scalar)
def dispatch_execute( self, ctx: FlyteContext, input_literal_map: _literal_models.LiteralMap ) -> Union[_literal_models.LiteralMap, _dynamic_job.DynamicJobSpec]: """ This function is mostly copied from the base PythonTask, but differs in that we have to infer the Python interface before executing. Also, we refer to ``self.task_template`` rather than just ``self`` like in task classes that derive from the base ``PythonTask``. """ # Invoked before the task is executed new_user_params = self.pre_execute(ctx.user_space_params) # Create another execution context with the new user params, but let's keep the same working dir with ctx.new_execution_context( mode=ctx.execution_state.mode, execution_params=new_user_params, working_dir=ctx.execution_state.working_dir, ) as exec_ctx: # Added: Have to reverse the Python interface from the task template Flyte interface # See docstring for more details. guessed_python_input_types = TypeEngine.guess_python_types( self.task_template.interface.inputs) native_inputs = TypeEngine.literal_map_to_kwargs( exec_ctx, input_literal_map, guessed_python_input_types) logger.info( f"Invoking FlyteTask executor {self.task_template.id.name} with inputs: {native_inputs}" ) try: native_outputs = self.execute(**native_inputs) except Exception as e: logger.exception(f"Exception when executing {e}") raise e logger.info( f"Task executed successfully in user level, outputs: {native_outputs}" ) # Lets run the post_execute method. This may result in a IgnoreOutputs Exception, which is # bubbled up to be handled at the callee layer. native_outputs = self.post_execute(new_user_params, native_outputs) # Short circuit the translation to literal map because what's returned may be a dj spec (or an # already-constructed LiteralMap if the dynamic task was a no-op), not python native values if isinstance(native_outputs, _literal_models.LiteralMap) or isinstance( native_outputs, _dynamic_job.DynamicJobSpec): return native_outputs expected_output_names = list( self.task_template.interface.outputs.keys()) if len(expected_output_names) == 1: # Here we have to handle the fact that the task could've been declared with a typing.NamedTuple of # length one. That convention is used for naming outputs - and single-length-NamedTuples are # particularly troublesome but elegant handling of them is not a high priority # Again, we're using the output_tuple_name as a proxy. # Deleted some stuff native_outputs_as_map = { expected_output_names[0]: native_outputs } elif len(expected_output_names) == 0: native_outputs_as_map = {} else: native_outputs_as_map = { expected_output_names[i]: native_outputs[i] for i, _ in enumerate(native_outputs) } # We manually construct a LiteralMap here because task inputs and outputs actually violate the assumption # built into the IDL that all the values of a literal map are of the same type. literals = {} for k, v in native_outputs_as_map.items(): literal_type = self.task_template.interface.outputs[k].type py_type = type(v) if isinstance(v, tuple): raise AssertionError( f"Output({k}) in task{self.task_template.id.name} received a tuple {v}, instead of {py_type}" ) try: literals[k] = TypeEngine.to_literal( exec_ctx, v, py_type, literal_type) except Exception as e: raise AssertionError( f"failed to convert return value for var {k}") from e outputs_literal_map = _literal_models.LiteralMap(literals=literals) # After the execute has been successfully completed return outputs_literal_map