def _set_columns(self, columns): names_seen = set() for column in columns: if not isinstance(column, tuple): raise _user_exceptions.FlyteValueException( column, "When specifying a Schema type with a known set of columns. Each column must be " "specified as a tuple in the form ('name', type).") if len(column) != 2: raise _user_exceptions.FlyteValueException( column, "When specifying a Schema type with a known set of columns. Each column must be " "specified as a tuple in the form ('name', type).") name, sdk_type = column sdk_type = _helpers.python_std_to_sdk_type(sdk_type) if not isinstance(name, (str, _six.text_type)): additional_msg = "When specifying a Schema type with a known set of columns, the first element in" \ " each tuple must be text." raise _user_exceptions.FlyteTypeException( received_type=type(name), received_value=name, expected_type={str, _six.text_type}, additional_msg=additional_msg) if not isinstance(sdk_type, _base_sdk_types.FlyteSdkType) or sdk_type.to_flyte_literal_type() not in \ get_supported_literal_types_to_pandas_types(): additional_msg = \ "When specifying a Schema type with a known set of columns, the second element of " \ "each tuple must be a supported type. Failed for column: {name}".format( name=name) raise _user_exceptions.FlyteTypeException( expected_type=list( get_supported_literal_types_to_pandas_types().keys()), received_type=sdk_type, additional_msg=additional_msg) if name in names_seen: raise ValueError( "The column name {name} was specified multiple times when instantiating the " "Schema.".format(name=name)) names_seen.add(name) self._sdk_columns = _collections.OrderedDict(columns)
def infer_sdk_type_from_literal(literal): """ :param flytekit.models.literals.Literal literal: :rtype: flytekit.common.types.base_sdk_types.FlyteSdkType """ for e in _TypeEngineLoader.iterate_engines_in_order(): out = e.infer_sdk_type_from_literal(literal) if out is not None: return out raise _user_exceptions.FlyteValueException(literal, "Could not resolve to a type implementation for this value.")
def python_std_to_sdk_type(t): """ :param T t: User input. Should be of the form: Types.Integer, [Types.Integer], {Types.String: Types.Integer}, etc. :rtype: flytekit.common.types.base_sdk_types.FlyteSdkType """ for e in _TypeEngineLoader.iterate_engines_in_order(): out = e.python_std_to_sdk_type(t) if out is not None: return out raise _user_exceptions.FlyteValueException(t, "Could not resolve to an SDK type for this value.")
def from_string(cls, string_value): """ :param Text string_value: :rtype: Blob """ if not string_value: _user_exceptions.FlyteValueException( string_value, "Cannot create a Blob from the provided path value.") return cls(_blob_impl.Blob.from_string(string_value, mode='rb'))
def from_string(cls, string_value): """ :param Text string_value: :rtype: Schema """ if not string_value: _user_exceptions.FlyteValueException( string_value, "Cannot create a Schema from an empty path") return cls( _schema_impl.Schema.from_string(string_value, schema_type=cls.schema_type))
def from_string(cls, string_value): """ :param Text string_value: Should be a JSON formatted string :rtype: Generic """ try: t = _json_format.Parse(string_value, _struct.Struct()) except Exception: raise _user_exceptions.FlyteValueException( string_value, "Could not be parsed from JSON.") return cls(t)
def from_string(cls, string_value, schema_type=None): """ :param Text string_value: :param SchemaType schema_type: :rtype: Schema """ if not string_value: _user_exceptions.FlyteValueException( string_value, "Cannot create a Schema from an empty path") return cls.create_at_known_location(string_value, schema_type=schema_type)
def from_string(cls, string_value): """ :param Text string_value: b64 encoded string of bytes :rtype: Protobuf """ try: decoded = _base64.b64decode(string_value) except TypeError: raise _user_exceptions.FlyteValueException(string_value, "The string is not valid base64-encoded.") pb_obj = cls.pb_type() pb_obj.ParseFromString(decoded) return cls(pb_obj)
def from_urn(cls, string: str) -> "WorkflowExecutionIdentifier": """ Parses a string in the correct format into an identifier """ segments = string.split(":") if len(segments) != 4: raise _user_exceptions.FlyteValueException( string, "The provided string was not in a parseable format. The string for an identifier must be in the format" " ex:project:domain:name.", ) resource_type, project, domain, name = segments if resource_type != "ex": raise _user_exceptions.FlyteValueException( resource_type, "The provided string could not be parsed. The first element of an execution identifier must be 'ex'.", ) return cls(project, domain, name)
def _validate_phases(self, phases): """ :param list[int] phases: """ if len(phases) == 0: raise _user_exceptions.FlyteAssertion("You must specify at least one phase for a notification.") for phase in phases: if phase not in self.VALID_PHASES: raise _user_exceptions.FlyteValueException( phase, self.VALID_PHASES, additional_message="Notifications can only be specified on terminal states." )
def from_string(cls, string_value): """ :param Text string_value: :rtype: MultiPartCSV """ if not string_value: _user_exceptions.FlyteValueException( string_value, "Cannot create a MultiPartCSV from the provided path value.") return cls( _blob_impl.MultiPartBlob.from_string(string_value, format='csv', mode='r'))
def from_urn(cls, urn: str) -> "Identifier": """ Parses a string urn in the correct format into an identifier """ segments = urn.split(":") if len(segments) != 5: raise _user_exceptions.FlyteValueException( urn, "The provided string was not in a parseable format. The string for an identifier must be in the " "format entity_type:project:domain:name:version.", ) resource_type, project, domain, name, version = segments if resource_type not in cls._STRING_TO_TYPE_MAP: raise _user_exceptions.FlyteValueException( resource_type, "The provided string could not be parsed. The first element of an identifier must be one of: " f"{list(cls._STRING_TO_TYPE_MAP.keys())}. ", ) return cls(cls._STRING_TO_TYPE_MAP[resource_type], project, domain, name, version)
def download(self, from_path, to_path): """ :param Text from_path: :param Text to_path: """ rsp = _requests.get(from_path) if rsp.status_code != type(self)._HTTP_OK: raise _user_exceptions.FlyteValueException( rsp.status_code, "Request for data @ {} failed. Expected status code {}".format(from_path, type(self)._HTTP_OK), ) with open(to_path, "wb") as writer: writer.write(rsp.content)
def from_python_std(cls, string): """ Parses a string in the correct format into an identifier :param Text string: :rtype: Identifier """ segments = string.split(":") if len(segments) != 5: raise _user_exceptions.FlyteValueException( "The provided string was not in a parseable format. The string for an identifier must be in the format" " entity_type:project:domain:name:version. Received: {}".format(string) ) resource_type, project, domain, name, version = segments if resource_type not in cls._STRING_TO_TYPE_MAP: raise _user_exceptions.FlyteValueException( "The provided string could not be parsed. The first element of an identifier must be one of: {}. " "Received: {}".format(list(cls._STRING_TO_TYPE_MAP.keys()), resource_type) ) resource_type = cls._STRING_TO_TYPE_MAP[resource_type] return cls(resource_type, project, domain, name, version)
def schema_instantiator(columns=None): """ :param list[(Text, flytekit.common.types.base_sdk_types.FlyteSdkType)] columns: [Optional] Description of the columns in the underlying schema. Should be tuples with the first element being the name. :rtype: SchemaInstantiator """ if columns is not None and len(columns) == 0: raise _user_exceptions.FlyteValueException( columns, "When specifying a Schema type with a known set of columns, a non-empty list must be provided as " "inputs") class _Schema(_six.with_metaclass(SchemaInstantiator, Schema)): _schema_type = _schema_impl.SchemaType(columns=columns) return _Schema
def exists(self, path): """ :param Text path: the path of the file :rtype bool: whether the file exists or not """ rsp = _requests.head(path) allowed_codes = { type(self)._HTTP_OK, type(self)._HTTP_NOT_FOUND, type(self)._HTTP_FORBIDDEN } if rsp.status_code not in allowed_codes: raise _user_exceptions.FlyteValueException( rsp.status_code, "Data at {} could not be checked for existence. Expected one of: {}" .format(path, allowed_codes)) return rsp.status_code == type(self)._HTTP_OK
def from_python_std(cls, t_value): """ :param T t_value: It is up to each individual object as to whether or not this value can be cast. :rtype: FlyteSdkValue :raises: flytekit.common.exceptions.user.FlyteTypeException """ if t_value is None: return _base_sdk_types.Void() elif type(t_value) != _datetime.datetime: raise _user_exceptions.FlyteTypeException(type(t_value), _datetime.datetime, t_value) elif t_value.tzinfo is None: raise _user_exceptions.FlyteValueException( t_value, "Datetime objects in Flyte must be timezone aware. " "tzinfo was found to be None.") return cls(t_value)
def from_python_std(cls, t_value): """ :param T t_value: It is up to each individual object as to whether or not this value can be cast. :rtype: FlyteSdkValue :raises: flytekit.common.exceptions.user.FlyteTypeException """ if t_value is None: return _base_sdk_types.Void() elif isinstance(t_value, _blob_impl.Blob): if t_value.metadata.type.format != "csv": raise _user_exceptions.FlyteValueException( t_value, "Blob is in incorrect format. Expected CSV.") blob = t_value else: blob = _blob_impl.Blob.from_python_std(t_value, format="csv", mode="w") return cls(blob)
def to_literal_model(self): """ Converts current binding data into a Literal asserting that there are no promises in the bindings. :rtype: Literal """ if self.promise: raise _user_exceptions.FlyteValueException( self.promise, "Cannot convert BindingData to a Literal because " "it has a promise.", ) elif self.scalar: return Literal(scalar=self.scalar) elif self.collection: return Literal( collection=LiteralCollection( literals=[binding.to_literal_model() for binding in self.collection.bindings] ) ) elif self.map: return Literal(map=LiteralMap(literals={k: binding.to_literal_model() for k, binding in self.map.bindings}))
def from_python_std(cls, t_value): """ :param T t_value: It is up to each individual object as to whether or not this value can be cast. :rtype: FlyteSdkValue :raises: flytekit.common.exceptions.user.FlyteTypeException """ if t_value is None: return _base_sdk_types.Void() elif not isinstance(t_value, dict): raise _user_exceptions.FlyteTypeException(type(t_value), dict, t_value) try: t = _json.dumps(t_value) except Exception: raise _user_exceptions.FlyteValueException( t_value, "Is not JSON serializable.") return cls(_json_format.Parse(t, _struct.Struct()))
def _load_engines(cls): config = _sdk_config.TYPE_ENGINES.get() if cls._LOADED_ENGINES is None or config != cls._LAST_LOADED: cls._LAST_LOADED = config cls._LOADED_ENGINES = [] for fqdn in config: split = fqdn.split(".") module_path, attr = ".".join(split[:-1]), split[-1] module = _exception_scopes.user_entry_point( _importlib.import_module)(module_path) if not hasattr(module, attr): raise _user_exceptions.FlyteValueException( module, "Failed to load the type engine because the attribute named '{}' could not be found" "in the module '{}'.".format(attr, module_path)) engine_impl = getattr(module, attr)() cls._LOADED_ENGINES.append(engine_impl) from flytekit.type_engines.default.flyte import FlyteDefaultTypeEngine as _DefaultEngine cls._LOADED_ENGINES.append(_DefaultEngine())
def _load_type_from_tag(tag: str) -> Type: """ Loads python type from tag """ if "." not in tag: raise _user_exceptions.FlyteValueException( tag, "Protobuf tag must include at least one '.' to delineate package and object name.", ) module, name = tag.rsplit(".", 1) try: pb_module = _importer.import_module(module) except ImportError: raise _user_exceptions.FlyteAssertion( "Could not resolve the protobuf definition @ {}. Is the protobuf library installed?".format(module) ) if not hasattr(pb_module, name): raise _user_exceptions.FlyteAssertion("Could not find the protobuf named: {} @ {}.".format(name, module)) return getattr(pb_module, name)
def _proto_sdk_type_from_tag(tag): """ :param Text tag: :rtype: _proto.Protobuf """ if "." not in tag: raise _user_exceptions.FlyteValueException( tag, "Protobuf tag must include at least one '.' to delineate package and object name.", ) module, name = tag.rsplit(".", 1) try: pb_module = _importer.import_module(module) except ImportError: raise _user_exceptions.FlyteAssertion( "Could not resolve the protobuf definition @ {}. Is the protobuf library installed?".format(module) ) if not hasattr(pb_module, name): raise _user_exceptions.FlyteAssertion("Could not find the protobuf named: {} @ {}.".format(name, module)) return _proto.create_protobuf(getattr(pb_module, name))
def _discover_workflow_components(workflow_class): """ This task iterates over the attributes of a user-defined class in order to return a list of inputs, outputs and nodes. :param class workflow_class: User-defined class with task instances as attributes. :rtype: (list[flytekit.common.promise.Input], list[Output], list[flytekit.common.nodes.SdkNode]) """ inputs = [] outputs = [] nodes = [] to_visit_objs = _queue.Queue() top_level_attributes = set() for attribute_name in dir(workflow_class): to_visit_objs.put( (attribute_name, getattr(workflow_class, attribute_name))) top_level_attributes.add(attribute_name) # For all task instances defined within the workflow, bind them to this specific workflow and hook-up to the # engine (when available) visited_obj_ids = set() while not to_visit_objs.empty(): attribute_name, current_obj = to_visit_objs.get() current_obj_id = id(current_obj) if current_obj_id in visited_obj_ids: continue visited_obj_ids.add(current_obj_id) if isinstance(current_obj, _nodes.SdkNode): # TODO: If an attribute name is on the form node_name[index], the resulting # node name might not be correct. nodes.append(current_obj.assign_id_and_return(attribute_name)) elif isinstance(current_obj, _promise.Input): if attribute_name is None or attribute_name not in top_level_attributes: raise _user_exceptions.FlyteValueException( attribute_name, "Detected workflow input specified outside of top level.", ) inputs.append( current_obj.rename_and_return_reference(attribute_name)) elif isinstance(current_obj, Output): if attribute_name is None or attribute_name not in top_level_attributes: raise _user_exceptions.FlyteValueException( attribute_name, "Detected workflow output specified outside of top level.", ) outputs.append( current_obj.rename_and_return_reference(attribute_name)) elif isinstance(current_obj, list) or isinstance( current_obj, set) or isinstance(current_obj, tuple): for idx, value in enumerate(current_obj): to_visit_objs.put( (_assign_indexed_attribute_name(attribute_name, idx), value)) elif isinstance(current_obj, dict): # Visit dictionary keys. for key in current_obj.keys(): to_visit_objs.put( (_assign_indexed_attribute_name(attribute_name, key), key)) # Visit dictionary values. for key, value in _six.iteritems(current_obj): to_visit_objs.put( (_assign_indexed_attribute_name(attribute_name, key), value)) return inputs, outputs, nodes
def _content_type_to_blob_format(content_type: _training_job_models) -> str: if content_type == _training_job_models.InputContentType.TEXT_CSV: return "csv" else: raise _user_exceptions.FlyteValueException( "Unsupported InputContentType: {}".format(content_type))