def test_properties_describe_processing_job_response(): prop = Properties("Steps.MyStep", "DescribeProcessingJobResponse") some_prop_names = [ "ProcessingInputs", "ProcessingOutputConfig", "ProcessingEndTime" ] for name in some_prop_names: assert name in prop.__dict__.keys() assert prop.ProcessingJobName.expr == { "Get": "Steps.MyStep.ProcessingJobName" } assert prop.ProcessingOutputConfig.Outputs[ "MyOutputName"].S3Output.S3Uri.expr == { "Get": "Steps.MyStep.ProcessingOutputConfig.Outputs['MyOutputName'].S3Output.S3Uri" }
def test_condition_in_mixed(): param = ParameterString(name="MyStr") prop = Properties("foo") var = ExecutionVariables.START_DATETIME cond_in = ConditionIn(value=param, in_values=["abc", prop, var]) assert cond_in.to_request() == { "Type": "In", "QueryValue": { "Get": "Parameters.MyStr" }, "Values": ["abc", { "Get": "foo" }, { "Get": "Execution.StartDateTime" }], }
def __init__( self, name: str, processor: Processor, inputs: List[ProcessingInput] = None, outputs: List[ProcessingOutput] = None, job_arguments: List[str] = None, code: str = None, property_files: List[PropertyFile] = None, cache_config: CacheConfig = None, ): """Construct a ProcessingStep, given a `Processor` instance. In addition to the processor instance, the other arguments are those that are supplied to the `process` method of the `sagemaker.processing.Processor`. Args: name (str): The name of the processing step. processor (Processor): A `sagemaker.processing.Processor` instance. inputs (List[ProcessingInput]): A list of `sagemaker.processing.ProcessorInput` instances. Defaults to `None`. outputs (List[ProcessingOutput]): A list of `sagemaker.processing.ProcessorOutput` instances. Defaults to `None`. job_arguments (List[str]): A list of strings to be passed into the processing job. Defaults to `None`. code (str): This can be an S3 URI or a local path to a file with the framework script to run. Defaults to `None`. property_files (List[PropertyFile]): A list of property files that workflow looks for and resolves from the configured processing output list. cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance. """ super(ProcessingStep, self).__init__(name, StepTypeEnum.PROCESSING) self.processor = processor self.inputs = inputs self.outputs = outputs self.job_arguments = job_arguments self.code = code self.property_files = property_files # Examine why run method in sagemaker.processing.Processor mutates the processor instance # by setting the instance's arguments attribute. Refactor Processor.run, if possible. self.processor.arguments = job_arguments self._properties = Properties( path=f"Steps.{name}", shape_name="DescribeProcessingJobResponse" ) self.cache_config = cache_config
def test_implicit_value(): prop = Properties("Steps.MyStep", "DescribeTrainingJobResponse") with pytest.raises(TypeError) as error: str(prop.CreationTime) assert str( error.value) == "Pipeline variables do not support __str__ operation." with pytest.raises(TypeError) as error: int(prop.CreationTime) assert str( error.value) == "Pipeline variables do not support __int__ operation." with pytest.raises(TypeError) as error: float(prop.CreationTime) assert str(error.value ) == "Pipeline variables do not support __float__ operation."
def __init__( self, name: str, estimator: EstimatorBase, model: Model, inputs: CompilationInput = None, job_arguments: List[str] = None, depends_on: Union[List[str], List[Step]] = None, retry_policies: List[RetryPolicy] = None, display_name: str = None, description: str = None, cache_config: CacheConfig = None, ): """Construct a CompilationStep. Given an `EstimatorBase` and a `sagemaker.model.Model` instance construct a CompilationStep. In addition to the estimator and Model instances, the other arguments are those that are supplied to the `compile_model` method of the `sagemaker.model.Model.compile_model`. Args: name (str): The name of the compilation step. estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance. model (Model): A `sagemaker.model.Model` instance. inputs (CompilationInput): A `sagemaker.inputs.CompilationInput` instance. Defaults to `None`. job_arguments (List[str]): A list of strings to be passed into the processing job. Defaults to `None`. depends_on (List[str] or List[Step]): A list of step names or step instances this `sagemaker.workflow.steps.CompilationStep` depends on retry_policies (List[RetryPolicy]): A list of retry policy display_name (str): The display name of the compilation step. description (str): The description of the compilation step. cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance. """ super(CompilationStep, self).__init__(name, StepTypeEnum.COMPILATION, display_name, description, depends_on, retry_policies) self.estimator = estimator self.model = model self.inputs = inputs self.job_arguments = job_arguments self._properties = Properties( path=f"Steps.{name}", shape_name="DescribeCompilationJobResponse") self.cache_config = cache_config
def __init__( self, name: str, estimator: EstimatorBase, inputs: Union[TrainingInput, dict, str, FileSystemInput] = None, cache_config: CacheConfig = None, depends_on: List[str] = None, ): """Construct a TrainingStep, given an `EstimatorBase` instance. In addition to the estimator instance, the other arguments are those that are supplied to the `fit` method of the `sagemaker.estimator.Estimator`. Args: name (str): The name of the training step. estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance. inputs (str or dict or sagemaker.inputs.TrainingInput or sagemaker.inputs.FileSystemInput): Information about the training data. This can be one of three types: * (str) the S3 location where training data is saved, or a file:// path in local mode. * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) If using multiple channels for training data, you can specify a dict mapping channel names to strings or :func:`~sagemaker.inputs.TrainingInput` objects. * (sagemaker.inputs.TrainingInput) - channel configuration for S3 data sources that can provide additional information as well as the path to the training dataset. See :func:`sagemaker.inputs.TrainingInput` for full details. * (sagemaker.inputs.FileSystemInput) - channel configuration for a file system data source that can provide additional information as well as the path to the training dataset. cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance. depends_on (List[str]): A list of step names this `sagemaker.workflow.steps.TrainingStep` depends on """ super(TrainingStep, self).__init__(name, StepTypeEnum.TRAINING, depends_on) self.estimator = estimator self.inputs = inputs self._properties = Properties(path=f"Steps.{name}", shape_name="DescribeTrainingJobResponse") self.cache_config = cache_config
def __init__( self, name: str, model: Model, inputs: CreateModelInput, depends_on: List[str] = None ): """Construct a CreateModelStep, given an `sagemaker.model.Model` instance. In addition to the Model instance, the other arguments are those that are supplied to the `_create_sagemaker_model` method of the `sagemaker.model.Model._create_sagemaker_model`. Args: name (str): The name of the CreateModel step. model (Model): A `sagemaker.model.Model` instance. inputs (CreateModelInput): A `sagemaker.inputs.CreateModelInput` instance. Defaults to `None`. depends_on (List[str]): A list of step names this `sagemaker.workflow.steps.CreateModelStep` depends on """ super(CreateModelStep, self).__init__(name, StepTypeEnum.CREATE_MODEL, depends_on) self.model = model self.inputs = inputs or CreateModelInput() self._properties = Properties(path=f"Steps.{name}", shape_name="DescribeModelOutput")
def __init__( self, name: str, estimator: EstimatorBase, inputs: TrainingInput = None, ): """Construct a TrainingStep, given an `EstimatorBase` instance. In addition to the estimator instance, the other arguments are those that are supplied to the `fit` method of the `sagemaker.estimator.Estimator`. Args: name (str): The name of the training step. estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance. inputs (TrainingInput): A `sagemaker.inputs.TrainingInput` instance. Defaults to `None`. """ super(TrainingStep, self).__init__(name, StepTypeEnum.TRAINING) self.estimator = estimator self.inputs = inputs self._properties = Properties(path=f"Steps.{name}", shape_name="DescribeTrainingJobResponse")
def __init__( self, name: str, transformer: Transformer, inputs: TransformInput, ): """Constructs a TransformStep, given an `Transformer` instance. In addition to the transformer instance, the other arguments are those that are supplied to the `transform` method of the `sagemaker.transformer.Transformer`. Args: name (str): The name of the transform step. transformer (Transformer): A `sagemaker.transformer.Transformer` instance. inputs (TransformInput): A `sagemaker.inputs.TransformInput` instance. """ super(TransformStep, self).__init__(name, StepTypeEnum.TRANSFORM) self.transformer = transformer self.inputs = inputs self._properties = Properties( path=f"Steps.{name}", shape_name="DescribeTransformJobResponse")
def test_join_expressions(): assert Join(values=[ "foo", ParameterFloat(name="MyFloat"), ParameterInteger(name="MyInt"), ParameterString(name="MyStr"), Properties(path="Steps.foo.OutputPath.S3Uri"), ExecutionVariables.PIPELINE_EXECUTION_ID, Join(on=",", values=[1, "a", False, 1.1]), ]).expr == { "Std:Join": { "On": "", "Values": [ "foo", { "Get": "Parameters.MyFloat" }, { "Get": "Parameters.MyInt" }, { "Get": "Parameters.MyStr" }, { "Get": "Steps.foo.OutputPath.S3Uri" }, { "Get": "Execution.PipelineExecutionId" }, { "Std:Join": { "On": ",", "Values": [1, "a", False, 1.1] } }, ], }, }
def __init__( self, name: str, tuner: HyperparameterTuner, inputs=None, job_arguments: List[str] = None, cache_config: CacheConfig = None, depends_on: List[str] = None, ): """Construct a TuningStep, given a `HyperparameterTuner` instance. In addition to the tuner instance, the other arguments are those that are supplied to the `fit` method of the `sagemaker.tuner.HyperparameterTuner`. Args: name (str): The name of the tuning step. tuner (HyperparameterTuner): A `sagemaker.tuner.HyperparameterTuner` instance. inputs: Information about the training data. Please refer to the ``fit()`` method of the associated estimator, as this can take any of the following forms: * (str) - The S3 location where training data is saved. * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or :func:`~sagemaker.inputs.TrainingInput` objects. * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See :func:`sagemaker.inputs.TrainingInput` for full details. * (sagemaker.session.FileSystemInput) - channel configuration for a file system data source that can provide additional information as well as the path to the training dataset. * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of Amazon :class:~`Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm. * (sagemaker.amazon.amazon_estimator.FileSystemRecordSet) - Amazon SageMaker channel configuration for a file system data source for Amazon algorithms. * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data. * (list[sagemaker.amazon.amazon_estimator.FileSystemRecordSet]) - A list of :class:~`sagemaker.amazon.amazon_estimator.FileSystemRecordSet` objects, where each instance is a different channel of training data. job_arguments (List[str]): A list of strings to be passed into the processing job. Defaults to `None`. cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance. depends_on (List[str]): A list of step names this `sagemaker.workflow.steps.ProcessingStep` depends on """ super(TuningStep, self).__init__(name, StepTypeEnum.TUNING, depends_on) self.tuner = tuner self.inputs = inputs self.job_arguments = job_arguments self._properties = Properties( path=f"Steps.{name}", shape_names=[ "DescribeHyperParameterTuningJobResponse", "ListTrainingJobsForHyperParameterTuningJobResponse", ], ) self.cache_config = cache_config
def __init__( self, name: str, step_args: Dict = None, tuner: HyperparameterTuner = None, display_name: str = None, description: str = None, inputs=None, job_arguments: List[str] = None, cache_config: CacheConfig = None, depends_on: Optional[List[Union[str, Step, "StepCollection"]]] = None, retry_policies: List[RetryPolicy] = None, ): """Construct a `TuningStep`, given a `HyperparameterTuner` instance. In addition to the `HyperparameterTuner` instance, the other arguments are those that are supplied to the `fit` method of the `sagemaker.tuner.HyperparameterTuner`. Args: name (str): The name of the `TuningStep`. step_args: The arguments for the `TuningStep` definition. tuner (HyperparameterTuner): A `sagemaker.tuner.HyperparameterTuner` instance. display_name (str): The display name of the `TuningStep`. description (str): The description of the `TuningStep`. inputs: Information about the training data. Please refer to the `fit()` method of the associated estimator, as this can take any of the following forms: * (str) - The S3 location where training data is saved. * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple channels for training data, you can specify a dictionary mapping channel names to strings or :func:`~sagemaker.inputs.TrainingInput` objects. * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See :func:`sagemaker.inputs.TrainingInput` for full details. * (sagemaker.session.FileSystemInput) - channel configuration for a file system data source that can provide additional information as well as the path to the training dataset. * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of Amazon :class:~`Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm. * (sagemaker.amazon.amazon_estimator.FileSystemRecordSet) - Amazon SageMaker channel configuration for a file system data source for Amazon algorithms. * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data. * (list[sagemaker.amazon.amazon_estimator.FileSystemRecordSet]) - A list of :class:~`sagemaker.amazon.amazon_estimator.FileSystemRecordSet` objects, where each instance is a different channel of training data. job_arguments (List[str]): A list of strings to be passed into the processing job. Defaults to `None`. cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance. depends_on (List[Union[str, Step, StepCollection]]): A list of `Step`/`StepCollection` names or `Step` instances or `StepCollection` instances that this `TuningStep` depends on. retry_policies (List[RetryPolicy]): A list of retry policies. """ super(TuningStep, self).__init__(name, StepTypeEnum.TUNING, display_name, description, depends_on, retry_policies) if not (step_args is not None) ^ (tuner is not None): raise ValueError( "either step_args or tuner need to be given, but not both.") self.step_args = step_args self.tuner = tuner self.inputs = inputs self.job_arguments = job_arguments self._properties = Properties( path=f"Steps.{name}", shape_names=[ "DescribeHyperParameterTuningJobResponse", "ListTrainingJobsForHyperParameterTuningJobResponse", ], ) self.cache_config = cache_config if not self.step_args: warnings.warn( ('We are deprecating the instantiation of TuningStep using "tuner".' 'Instead, simply using "step_args".'), DeprecationWarning, )
def __init__( self, name: str, step_args: Dict = None, processor: Processor = None, display_name: str = None, description: str = None, inputs: List[ProcessingInput] = None, outputs: List[ProcessingOutput] = None, job_arguments: List[str] = None, code: str = None, property_files: List[PropertyFile] = None, cache_config: CacheConfig = None, depends_on: Optional[List[Union[str, Step, "StepCollection"]]] = None, retry_policies: List[RetryPolicy] = None, kms_key=None, ): """Construct a `ProcessingStep`, given a `Processor` instance. In addition to the `Processor` instance, the other arguments are those that are supplied to the `process` method of the `sagemaker.processing.Processor`. Args: name (str): The name of the `ProcessingStep`. step_args: The arguments for the `ProcessingStep` definition. processor (Processor): A `sagemaker.processing.Processor` instance. display_name (str): The display name of the `ProcessingStep`. description (str): The description of the `ProcessingStep` inputs (List[ProcessingInput]): A list of `sagemaker.processing.ProcessorInput` instances. Defaults to `None`. outputs (List[ProcessingOutput]): A list of `sagemaker.processing.ProcessorOutput` instances. Defaults to `None`. job_arguments (List[str]): A list of strings to be passed into the processing job. Defaults to `None`. code (str): This can be an S3 URI or a local path to a file with the framework script to run. Defaults to `None`. property_files (List[PropertyFile]): A list of property files that workflow looks for and resolves from the configured processing output list. cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance. depends_on (List[Union[str, Step, StepCollection]]): A list of `Step`/`StepCollection` names or `Step` instances or `StepCollection` instances that this `ProcessingStep` depends on. retry_policies (List[RetryPolicy]): A list of retry policies. kms_key (str): The ARN of the KMS key that is used to encrypt the user code file. Defaults to `None`. """ super(ProcessingStep, self).__init__(name, StepTypeEnum.PROCESSING, display_name, description, depends_on, retry_policies) if not (step_args is not None) ^ (processor is not None): raise ValueError( "either step_args or processor need to be given, but not both." ) self.step_args = step_args self.processor = processor self.inputs = inputs self.outputs = outputs self.job_arguments = job_arguments self.code = code self.property_files = property_files self.job_name = None self.kms_key = kms_key self.cache_config = cache_config self._properties = Properties( path=f"Steps.{name}", shape_name="DescribeProcessingJobResponse") if not self.step_args: # Examine why run method in `sagemaker.processing.Processor` # mutates the processor instance by setting the instance's # arguments attribute. Refactor `Processor.run`, if possible. self.processor.arguments = job_arguments if code: code_url = urlparse(code) if code_url.scheme == "" or code_url.scheme == "file": # By default, `Processor` will upload the local code to an S3 path # containing a timestamp. This causes cache misses whenever a # pipeline is updated, even if the underlying script hasn't changed. # To avoid this, hash the contents of the script and include it # in the `job_name` passed to the `Processor`, which will be used # instead of the timestamped path. self.job_name = self._generate_code_upload_path() warnings.warn( ('We are deprecating the instantiation of ProcessingStep using "processor".' 'Instead, simply using "step_args".'), DeprecationWarning, )
def test_string_builtin_funcs_that_return_bool(): prop = Properties("Steps.MyStep", "DescribeModelPackageOutput") # The prop will only be parsed in runtime (Pipeline backend) so not able to tell in SDK assert not prop.startswith("s3") assert not prop.endswith("s3")
def __init__(self, name, display_name=None, description=None): super(CustomStep, self).__init__(name, display_name, description, StepTypeEnum.TRAINING) self._properties = Properties(path=f"Steps.{name}")
def __init__( self, name: str, step_args: Optional[dict] = None, content_types: Optional[list] = None, response_types: Optional[list] = None, inference_instances: Optional[list] = None, transform_instances: Optional[list] = None, estimator: EstimatorBase = None, model_data=None, model_package_group_name=None, model_metrics=None, metadata_properties=None, approval_status="PendingManualApproval", image_uri=None, compile_model_family=None, display_name: str = None, description=None, depends_on: Optional[List[Union[str, Step, "StepCollection"]]] = None, retry_policies: Optional[List[RetryPolicy]] = None, tags=None, container_def_list=None, drift_check_baselines=None, customer_metadata_properties=None, **kwargs, ): """Constructor of a register model step. Args: name (str): The name of the training step. step_args (dict): The arguments for this `_RegisterModelStep` definition (default: None). content_types (list): The supported MIME types for the input data (default: None). response_types (list): The supported MIME types for the output data (default: None). inference_instances (list): A list of the instance types that are used to generate inferences in real-time (default: None). transform_instances (list): A list of the instance types on which a transformation job can be run or on which an endpoint can be deployed (default: None). estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance (default: None). model_data: the S3 URI to the model data from training (default: None). model_package_group_name (str): Model Package Group name, exclusive to `model_package_name`, using `model_package_group_name` makes the Model Package versioned (default: None). model_metrics (ModelMetrics): ModelMetrics object (default: None). metadata_properties (MetadataProperties): MetadataProperties object (default: None). approval_status (str): Model Approval Status, values can be "Approved", "Rejected", or "PendingManualApproval" (default: "PendingManualApproval"). image_uri (str): The container image uri for Model Package, if not specified, Estimator's training container image will be used (default: None). compile_model_family (str): Instance family for compiled model, if specified, a compiled model will be used (default: None). display_name (str): The display name of this `_RegisterModelStep` step (default: None). description (str): Model Package description (default: None). depends_on (List[Union[str, Step, StepCollection]]): The list of `Step`/`StepCollection` names or `Step` instances or `StepCollection` instances that the current `Step` depends on (default: None). retry_policies (List[RetryPolicy]): The list of retry policies for the current step (default: None). tags (List[dict[str, str]]): A list of dictionaries containing key-value pairs used to configure the create model package request (default: None). container_def_list (list): A list of container definitions (default: None). drift_check_baselines (DriftCheckBaselines): DriftCheckBaselines object (default: None). customer_metadata_properties (dict[str, str]): A dictionary of key-value paired metadata properties (default: None). **kwargs: additional arguments to `create_model`. """ super(_RegisterModelStep, self).__init__(name, StepTypeEnum.REGISTER_MODEL, display_name, description, depends_on, retry_policies) deprecated_args_missing = (content_types is None or response_types is None or inference_instances is None or transform_instances is None) if not (step_args is None) ^ deprecated_args_missing: raise ValueError( "step_args and the set of (content_types, response_types, " "inference_instances, transform_instances) are mutually exclusive. " "Either of them should be provided.") self.step_args = step_args self.estimator = estimator self.model_data = model_data self.content_types = content_types self.response_types = response_types self.inference_instances = inference_instances self.transform_instances = transform_instances self.model_package_group_name = model_package_group_name self.tags = tags self.model_metrics = model_metrics self.drift_check_baselines = drift_check_baselines self.customer_metadata_properties = customer_metadata_properties self.metadata_properties = metadata_properties self.approval_status = approval_status self.image_uri = image_uri self.compile_model_family = compile_model_family self.description = description self.tags = tags self.kwargs = kwargs self.container_def_list = container_def_list self._properties = Properties(path=f"Steps.{name}", shape_name="DescribeModelPackageOutput")
def __init__( self, name: str, clarify_check_config: ClarifyCheckConfig, check_job_config: CheckJobConfig, skip_check: Union[bool, PipelineNonPrimitiveInputTypes] = False, register_new_baseline: Union[bool, PipelineNonPrimitiveInputTypes] = False, model_package_group_name: Union[str, PipelineNonPrimitiveInputTypes] = None, supplied_baseline_constraints: Union[ str, PipelineNonPrimitiveInputTypes] = None, display_name: str = None, description: str = None, cache_config: CacheConfig = None, depends_on: Union[List[str], List[Step]] = None, ): """Constructs a ClarifyCheckStep. Args: name (str): The name of the ClarifyCheckStep step. clarify_check_config (ClarifyCheckConfig): A ClarifyCheckConfig instance. check_job_config (CheckJobConfig): A CheckJobConfig instance. skip_check (bool or PipelineNonPrimitiveInputTypes): Whether the check should be skipped (default: False). register_new_baseline (bool or PipelineNonPrimitiveInputTypes): Whether the new baseline should be registered (default: False). model_package_group_name (str or PipelineNonPrimitiveInputTypes): The name of a registered model package group, among which the baseline will be fetched from the latest approved model (default: None). supplied_baseline_constraints (str or PipelineNonPrimitiveInputTypes): The S3 path to the supplied constraints object representing the constraints JSON file which will be used for drift to check (default: None). display_name (str): The display name of the ClarifyCheckStep step (default: None). description (str): The description of the ClarifyCheckStep step (default: None). cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance (default: None). depends_on (List[str] or List[Step]): A list of step names or step instances this `sagemaker.workflow.steps.ClarifyCheckStep` depends on (default: None). """ if (not isinstance(clarify_check_config, DataBiasCheckConfig) and not isinstance(clarify_check_config, ModelBiasCheckConfig) and not isinstance(clarify_check_config, ModelExplainabilityCheckConfig)): raise RuntimeError( "The clarify_check_config can only be object of " + "DataBiasCheckConfig, ModelBiasCheckConfig or ModelExplainabilityCheckConfig" ) if is_pipeline_variable(clarify_check_config.data_config. s3_analysis_config_output_path): raise RuntimeError( "s3_analysis_config_output_path cannot be of type " + "ExecutionVariable/Expression/Parameter/Properties") if (not clarify_check_config.data_config.s3_analysis_config_output_path and is_pipeline_variable( clarify_check_config.data_config.s3_output_path)): raise RuntimeError( "`s3_output_path` cannot be of type ExecutionVariable/Expression/Parameter" + "/Properties if `s3_analysis_config_output_path` is none or empty " ) super(ClarifyCheckStep, self).__init__(name, display_name, description, StepTypeEnum.CLARIFY_CHECK, depends_on) self.skip_check = skip_check self.register_new_baseline = register_new_baseline self.clarify_check_config = clarify_check_config self.check_job_config = check_job_config self.model_package_group_name = model_package_group_name self.supplied_baseline_constraints = supplied_baseline_constraints self.cache_config = cache_config if isinstance(self.clarify_check_config, ModelExplainabilityCheckConfig): self._model_monitor = self.check_job_config._generate_model_monitor( "ModelExplainabilityMonitor") else: self._model_monitor = self.check_job_config._generate_model_monitor( "ModelBiasMonitor") self.clarify_check_config.monitoring_analysis_config_uri = ( self._upload_monitoring_analysis_config()) self._baselining_processor = self._model_monitor._create_baselining_processor( ) self._processing_params = self._generate_processing_job_parameters( self._generate_processing_job_analysis_config(), self._baselining_processor) root_path = f"Steps.{name}" root_prop = Properties(path=root_path) root_prop.__dict__["CalculatedBaselineConstraints"] = Properties( f"{root_path}.CalculatedBaselineConstraints") root_prop.__dict__[ "BaselineUsedForDriftCheckConstraints"] = Properties( f"{root_path}.BaselineUsedForDriftCheckConstraints") self._properties = root_prop
def __init__( self, name: str, quality_check_config: QualityCheckConfig, check_job_config: CheckJobConfig, skip_check: Union[bool, PipelineNonPrimitiveInputTypes] = False, register_new_baseline: Union[bool, PipelineNonPrimitiveInputTypes] = False, model_package_group_name: Union[str, PipelineNonPrimitiveInputTypes] = None, supplied_baseline_statistics: Union[str, PipelineNonPrimitiveInputTypes] = None, supplied_baseline_constraints: Union[str, PipelineNonPrimitiveInputTypes] = None, display_name: str = None, description: str = None, cache_config: CacheConfig = None, depends_on: Union[List[str], List[Step]] = None, ): """Constructs a QualityCheckStep. Args: name (str): The name of the QualityCheckStep step. quality_check_config (QualityCheckConfig): A QualityCheckConfig instance. check_job_config (CheckJobConfig): A CheckJobConfig instance. skip_check (bool or PipelineNonPrimitiveInputTypes): Whether the check should be skipped (default: False). register_new_baseline (bool or PipelineNonPrimitiveInputTypes): Whether the new baseline should be registered (default: False). model_package_group_name (str or PipelineNonPrimitiveInputTypes): The name of a registered model package group, among which the baseline will be fetched from the latest approved model (default: None). supplied_baseline_statistics (str or PipelineNonPrimitiveInputTypes): The S3 path to the supplied statistics object representing the statistics JSON file which will be used for drift to check (default: None). supplied_baseline_constraints (str or PipelineNonPrimitiveInputTypes): The S3 path to the supplied constraints object representing the constraints JSON file which will be used for drift to check (default: None). display_name (str): The display name of the QualityCheckStep step (default: None). description (str): The description of the QualityCheckStep step (default: None). cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance (default: None). depends_on (List[str] or List[Step]): A list of step names or step instances this `sagemaker.workflow.steps.QualityCheckStep` depends on (default: None). """ if not isinstance(quality_check_config, DataQualityCheckConfig) and not isinstance( quality_check_config, ModelQualityCheckConfig ): raise RuntimeError( "The quality_check_config can only be object of " + "DataQualityCheckConfig or ModelQualityCheckConfig" ) super(QualityCheckStep, self).__init__( name, display_name, description, StepTypeEnum.QUALITY_CHECK, depends_on ) self.skip_check = skip_check self.register_new_baseline = register_new_baseline self.check_job_config = check_job_config self.quality_check_config = quality_check_config self.model_package_group_name = model_package_group_name self.supplied_baseline_statistics = supplied_baseline_statistics self.supplied_baseline_constraints = supplied_baseline_constraints self.cache_config = cache_config if isinstance(self.quality_check_config, DataQualityCheckConfig): self._model_monitor = self.check_job_config._generate_model_monitor( "DefaultModelMonitor" ) else: self._model_monitor = self.check_job_config._generate_model_monitor( "ModelQualityMonitor" ) self._model_monitor.latest_baselining_job_name = ( self._model_monitor._generate_baselining_job_name() ) baseline_job_inputs_with_nones = self._generate_baseline_job_inputs() self._baseline_job_inputs = [ baseline_job_input for baseline_job_input in baseline_job_inputs_with_nones.values() if baseline_job_input is not None ] self._baseline_output = self._generate_baseline_output() self._baselining_processor = self._generate_baseline_processor( baseline_dataset_input=baseline_job_inputs_with_nones["baseline_dataset_input"], baseline_output=self._baseline_output, post_processor_script_input=baseline_job_inputs_with_nones[ "post_processor_script_input" ], record_preprocessor_script_input=baseline_job_inputs_with_nones[ "record_preprocessor_script_input" ], ) root_path = f"Steps.{name}" root_prop = Properties(path=root_path) root_prop.__dict__["CalculatedBaselineConstraints"] = Properties( f"{root_path}.CalculatedBaselineConstraints" ) root_prop.__dict__["CalculatedBaselineStatistics"] = Properties( f"{root_path}.CalculatedBaselineStatistics" ) root_prop.__dict__["BaselineUsedForDriftCheckStatistics"] = Properties( f"{root_path}.BaselineUsedForDriftCheckStatistics" ) root_prop.__dict__["BaselineUsedForDriftCheckConstraints"] = Properties( f"{root_path}.BaselineUsedForDriftCheckConstraints" ) self._properties = root_prop
def __init__( self, name: str, step_args: Dict = None, estimator: EstimatorBase = None, display_name: str = None, description: str = None, inputs: Union[TrainingInput, dict, str, FileSystemInput] = None, cache_config: CacheConfig = None, depends_on: Optional[List[Union[str, Step, "StepCollection"]]] = None, retry_policies: List[RetryPolicy] = None, ): """Construct a `TrainingStep`, given an `EstimatorBase` instance. In addition to the `EstimatorBase` instance, the other arguments are those that are supplied to the `fit` method of the `sagemaker.estimator.Estimator`. Args: name (str): The name of the `TrainingStep`. step_args: The arguments for the `TrainingStep` definition. estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance. display_name (str): The display name of the `TrainingStep`. description (str): The description of the `TrainingStep`. inputs (Union[str, dict, TrainingInput, FileSystemInput]): Information about the training data. This can be one of three types: * (str) the S3 location where training data is saved, or a file:// path in local mode. * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) If using multiple channels for training data, you can specify a dictionary mapping channel names to strings or :func:`~sagemaker.inputs.TrainingInput` objects. * (sagemaker.inputs.TrainingInput) - channel configuration for S3 data sources that can provide additional information as well as the path to the training dataset. See :func:`sagemaker.inputs.TrainingInput` for full details. * (sagemaker.inputs.FileSystemInput) - channel configuration for a file system data source that can provide additional information as well as the path to the training dataset. cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance. depends_on (List[Union[str, Step, StepCollection]]): A list of `Step`/`StepCollection` names or `Step` instances or `StepCollection` instances that this `TrainingStep` depends on. retry_policies (List[RetryPolicy]): A list of retry policies. """ super(TrainingStep, self).__init__(name, StepTypeEnum.TRAINING, display_name, description, depends_on, retry_policies) if not (step_args is not None) ^ (estimator is not None): raise ValueError("either step_args or estimator need to be given.") self.step_args = step_args self.estimator = estimator self.inputs = inputs self._properties = Properties(path=f"Steps.{name}", shape_name="DescribeTrainingJobResponse") self.cache_config = cache_config if self.cache_config: if (self.step_args and "ProfilerConfig" in self.step_args) or ( self.estimator is not None and not self.estimator.disable_profiler): msg = ( "Profiling is enabled on the provided estimator. " "The default profiler rule includes a timestamp " "which will change each time the pipeline is " "upserted, causing cache misses. If profiling " "is not needed, set disable_profiler to True on the estimator." ) warnings.warn(msg) if not self.step_args: warnings.warn( ('We are deprecating the instantiation of TrainingStep using "estimator".' 'Instead, simply using "step_args".'), DeprecationWarning, ) self.job_name = None if estimator and (estimator.source_dir or estimator.entry_point): # By default, `Estimator` will upload the local code to an S3 path # containing a timestamp. This causes cache misses whenever a # pipeline is updated, even if the underlying script hasn't changed. # To avoid this, hash the contents of the training script and include it # in the `job_name` passed to the `Estimator`, which will be used # instead of the timestamped path. self.job_name = self._generate_code_upload_path()
def __init__(self, name): super(CustomStep, self).__init__(name, StepTypeEnum.TRAINING) self._properties = Properties(path=f"Steps.{name}")
def __init__( self, name: str, processor: Processor, display_name: str = None, description: str = None, inputs: List[ProcessingInput] = None, outputs: List[ProcessingOutput] = None, job_arguments: List[str] = None, code: str = None, property_files: List[PropertyFile] = None, cache_config: CacheConfig = None, depends_on: Union[List[str], List[Step]] = None, retry_policies: List[RetryPolicy] = None, ): """Construct a ProcessingStep, given a `Processor` instance. In addition to the processor instance, the other arguments are those that are supplied to the `process` method of the `sagemaker.processing.Processor`. Args: name (str): The name of the processing step. processor (Processor): A `sagemaker.processing.Processor` instance. display_name (str): The display name of the processing step. description (str): The description of the processing step. inputs (List[ProcessingInput]): A list of `sagemaker.processing.ProcessorInput` instances. Defaults to `None`. outputs (List[ProcessingOutput]): A list of `sagemaker.processing.ProcessorOutput` instances. Defaults to `None`. job_arguments (List[str]): A list of strings to be passed into the processing job. Defaults to `None`. code (str): This can be an S3 URI or a local path to a file with the framework script to run. Defaults to `None`. property_files (List[PropertyFile]): A list of property files that workflow looks for and resolves from the configured processing output list. cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance. depends_on (List[str] or List[Step]): A list of step names or step instance this `sagemaker.workflow.steps.ProcessingStep` depends on retry_policies (List[RetryPolicy]): A list of retry policy """ super(ProcessingStep, self).__init__(name, StepTypeEnum.PROCESSING, display_name, description, depends_on, retry_policies) self.processor = processor self.inputs = inputs self.outputs = outputs self.job_arguments = job_arguments self.code = code self.property_files = property_files self.job_name = None # Examine why run method in sagemaker.processing.Processor mutates the processor instance # by setting the instance's arguments attribute. Refactor Processor.run, if possible. self.processor.arguments = job_arguments self._properties = Properties( path=f"Steps.{name}", shape_name="DescribeProcessingJobResponse") self.cache_config = cache_config if code: code_url = urlparse(code) if code_url.scheme == "" or code_url.scheme == "file": # By default, Processor will upload the local code to an S3 path # containing a timestamp. This causes cache misses whenever a # pipeline is updated, even if the underlying script hasn't changed. # To avoid this, hash the contents of the script and include it # in the job_name passed to the Processor, which will be used # instead of the timestamped path. self.job_name = self._generate_code_upload_path()
def __init__( self, name: str, estimator: EstimatorBase, model_data, content_types, response_types, inference_instances, transform_instances, model_package_group_name=None, model_metrics=None, metadata_properties=None, approval_status="PendingManualApproval", image_uri=None, compile_model_family=None, description=None, **kwargs, ): """Constructor of a register model step. Args: name (str): The name of the training step. step_type (StepTypeEnum): The type of the step with value `StepTypeEnum.Training`. estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance. model_data: the S3 URI to the model data from training. content_types (list): The supported MIME types for the input data (default: None). response_types (list): The supported MIME types for the output data (default: None). inference_instances (list): A list of the instance types that are used to generate inferences in real-time (default: None). transform_instances (list): A list of the instance types on which a transformation job can be run or on which an endpoint can be deployed (default: None). model_package_group_name (str): Model Package Group name, exclusive to `model_package_name`, using `model_package_group_name` makes the Model Package versioned (default: None). model_metrics (ModelMetrics): ModelMetrics object (default: None). metadata_properties (MetadataProperties): MetadataProperties object (default: None). approval_status (str): Model Approval Status, values can be "Approved", "Rejected", or "PendingManualApproval" (default: "PendingManualApproval"). image_uri (str): The container image uri for Model Package, if not specified, Estimator's training container image will be used (default: None). compile_model_family (str): Instance family for compiled model, if specified, a compiled model will be used (default: None). description (str): Model Package description (default: None). **kwargs: additional arguments to `create_model`. """ super(_RegisterModelStep, self).__init__(name, StepTypeEnum.REGISTER_MODEL) self.estimator = estimator self.model_data = model_data self.content_types = content_types self.response_types = response_types self.inference_instances = inference_instances self.transform_instances = transform_instances self.model_package_group_name = model_package_group_name self.model_metrics = model_metrics self.metadata_properties = metadata_properties self.approval_status = approval_status self.image_uri = image_uri self.compile_model_family = compile_model_family self.description = description self.kwargs = kwargs self._properties = Properties( path=f"Steps.{name}", shape_name="DescribeModelPackageResponse" )
def test_pipeline_variable_in_pipeline_definition(sagemaker_session): param_str = ParameterString(name="MyString", default_value="1") param_int = ParameterInteger(name="MyInteger", default_value=3) property_file = PropertyFile( name="name", output_name="result", path="output", ) json_get_func2 = JsonGet( step_name="my-step", property_file=property_file, json_path="my-json-path", ) prop = Properties("Steps.MyStep", "DescribeProcessingJobResponse") cond = ConditionGreaterThan(left=param_str, right=param_int.to_string()) step_fail = FailStep( name="MyFailStep", error_message=Join( on=" ", values=[ "Execution failed due to condition check fails, see:", json_get_func2.to_string(), prop.ProcessingOutputConfig.Outputs["MyOutputName"].S3Output. S3Uri.to_string(), param_int, ], ), ) step_cond = ConditionStep( name="MyCondStep", conditions=[cond], if_steps=[], else_steps=[step_fail], ) pipeline = Pipeline( name="MyPipeline", parameters=[param_str, param_int], steps=[step_cond], sagemaker_session=sagemaker_session, ) dsl = json.loads(pipeline.definition()) assert dsl["Parameters"] == [ { "Name": "MyString", "Type": "String", "DefaultValue": "1" }, { "Name": "MyInteger", "Type": "Integer", "DefaultValue": 3 }, ] assert len(dsl["Steps"]) == 1 assert dsl["Steps"][0] == { "Name": "MyCondStep", "Type": "Condition", "Arguments": { "Conditions": [ { "Type": "GreaterThan", "LeftValue": { "Get": "Parameters.MyString" }, "RightValue": { "Std:Join": { "On": "", "Values": [{ "Get": "Parameters.MyInteger" }], }, }, }, ], "IfSteps": [], "ElseSteps": [{ "Name": "MyFailStep", "Type": "Fail", "Arguments": { "ErrorMessage": { "Std:Join": { "On": " ", "Values": [ "Execution failed due to condition check fails, see:", { "Std:Join": { "On": "", "Values": [ { "Std:JsonGet": { "PropertyFile": { "Get": "Steps.my-step.PropertyFiles.name" }, "Path": "my-json-path", } }, ], }, }, { "Std:Join": { "On": "", "Values": [ { "Get": "Steps.MyStep.ProcessingOutputConfig." + "Outputs['MyOutputName'].S3Output.S3Uri" }, ], }, }, { "Get": "Parameters.MyInteger" }, ], } } }, }], }, }