def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags=None, wait=True): """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, this method returns a the result of invoking ``self.predictor_cls`` on the created endpoint name. The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns The name of the created endpoint is accessible in the ``endpoint_name`` field of this ``Model`` after deploy returns. Args: initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. tags (List[dict[str, str]]): The list of tags to attach to this specific endpoint. wait (bool): Whether the call should wait until the deployment of model completes (default: True). Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. """ if not self.sagemaker_session: self.sagemaker_session = Session() containers = self.pipeline_container_def(instance_type) self.name = self.name or name_from_image(containers[0]["Image"]) self.sagemaker_session.create_model(self.name, self.role, containers, vpc_config=self.vpc_config) production_variant = sagemaker.production_variant( self.name, instance_type, initial_instance_count) self.endpoint_name = endpoint_name or self.name self.sagemaker_session.endpoint_from_production_variants( self.endpoint_name, [production_variant], tags, wait=wait) if self.predictor_cls: return self.predictor_cls(self.endpoint_name, self.sagemaker_session) return None
def upload(local_path, desired_s3_uri, kms_key=None, sagemaker_session=None): """Static method that uploads a given file or directory to S3. Args: local_path (str): Path (absolute or relative) of local file or directory to upload. desired_s3_uri (str): The desired S3 location to upload to. It is the prefix to which the local filename will be added. kms_key (str): The KMS key to use to encrypt the files. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. Returns: The S3 uri of the uploaded file(s). """ sagemaker_session = sagemaker_session or Session() bucket, key_prefix = parse_s3_url(url=desired_s3_uri) if kms_key is not None: extra_args = { "SSEKMSKeyId": kms_key, "ServerSideEncryption": "aws:kms" } else: extra_args = None return sagemaker_session.upload_data(path=local_path, bucket=bucket, key_prefix=key_prefix, extra_args=extra_args)
def attach(cls, tuning_job_name, sagemaker_session=None, job_details=None, estimator_cls=None): sagemaker_session = sagemaker_session or Session() if job_details is None: job_details = sagemaker_session.sagemaker_client \ .describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=tuning_job_name) estimator_cls = cls._prepare_estimator_cls( estimator_cls, job_details['TrainingJobDefinition']) estimator = cls._prepare_estimator_from_job_description( estimator_cls, job_details['TrainingJobDefinition'], sagemaker_session) init_params = cls._prepare_init_params_from_job_description( job_details) tuner = cls(estimator=estimator, **init_params) tuner.latest_tuning_job = _TuningJob( sagemaker_session=sagemaker_session, tuning_job_name=tuning_job_name) return tuner
def upload_string_as_file_body(body, desired_s3_uri=None, kms_key=None, sagemaker_session=None): """Static method that uploads a given file or directory to S3. Args: body (str): String representing the body of the file. desired_s3_uri (str): The desired S3 uri to upload to. kms_key (str): The KMS key to use to encrypt the files. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. Returns: str: The S3 uri of the uploaded file(s). """ sagemaker_session = sagemaker_session or Session() bucket, key = parse_s3_url(desired_s3_uri) sagemaker_session.upload_string_as_file_body(body=body, bucket=bucket, key=key, kms_key=kms_key) return desired_s3_uri
def __init__(self, training_job_name, metric_names=None, sagemaker_session=None, start_time=None, end_time=None, period=None): """Initialize a ``TrainingJobAnalytics`` instance. Args: training_job_name (str): name of the TrainingJob to analyze. metric_names (list, optional): string names of all the metrics to collect for this training job. If not specified, then it will use all metric names configured for this job. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, one is specified using the default AWS configuration chain. """ sagemaker_session = sagemaker_session or Session() self._sage_client = sagemaker_session.sagemaker_client self._cloudwatch = sagemaker_session.boto_session.client('cloudwatch') self._training_job_name = training_job_name self._start_time = start_time self._end_time = end_time self._period = period or METRICS_PERIOD_DEFAULT if metric_names: self._metric_names = metric_names else: self._metric_names = self._metric_names_for_training_job() self.clear_cache()
def __init__(self, endpoint, sagemaker_session=None, serializer=None, deserializer=None, content_type=None, accept=None): """Initialize a ``RealTimePredictor``. Behavior for serialization of input data and deserialization of result data can be configured through initializer arguments. If not specified, a sequence of bytes is expected and the API sends it in the request body without modifications. In response, the API returns the sequence of bytes from the prediction result without any modifications. Args: endpoint (str): Name of the Amazon SageMaker endpoint to which requests are sent. sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker interactions (default: None). If not specified, one is created using the default AWS configuration chain. serializer (callable): Accepts a single argument, the input data, and returns a sequence of bytes. It may provide a ``content_type`` attribute that defines the endpoint request content type. If not specified, a sequence of bytes is expected for the data. deserializer (callable): Accepts two arguments, the result data and the response content type, and returns a sequence of bytes. It may provide a ``content_type`` attribute that defines the endpoint response's "Accept" content type. If not specified, a sequence of bytes is expected for the data. content_type (str): The invocation's "ContentType", overriding any ``content_type`` from the serializer (default: None). accept (str): The invocation's "Accept", overriding any accept from the deserializer (default: None). """ self.endpoint = endpoint self.sagemaker_session = sagemaker_session or Session() self.serializer = serializer self.deserializer = deserializer self.content_type = content_type or getattr(serializer, 'content_type', None) self.accept = accept or getattr(deserializer, 'accept', None)
def __init__(self, role, train_instance_count, train_instance_type, train_volume_size=30, train_max_run=24 * 60 * 60, input_mode='File', output_path=None, output_kms_key=None, base_job_name=None, sagemaker_session=None): """Initialize an ``EstimatorBase`` instance. Args: role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access training data and model artifacts. After the endpoint is created, the inference code might use the IAM role, if it needs to access an AWS resource. train_instance_count (int): Number of Amazon EC2 instances to use for training. train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. train_volume_size (int): Size in GB of the EBS volume to use for storing input data during training (default: 30). Must be large enough to store training data if File Mode is used (which is the default). train_max_run (int): Timeout in seconds for training (default: 24 * 60 * 60). After this amount of time Amazon SageMaker terminates the job regardless of its current status. input_mode (str): The input mode that the algorithm supports (default: 'File'). Valid modes: 'File' - Amazon SageMaker copies the training dataset from the S3 location to a local directory. 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe. output_path (str): S3 location for saving the trainig result (model artifacts and output files). If not specified, results are stored to a default bucket. If the bucket with the specific name does not exist, the estimator creates the bucket during the :meth:`~sagemaker.estimator.EstimatorBase.fit` method execution. output_kms_key (str): Optional. KMS key ID for encrypting the training output (default: None). base_job_name (str): Prefix for training job name when the :meth:`~sagemaker.estimator.EstimatorBase.fit` method launches. If not specified, the estimator generates a default job name, based on the training image name and current timestamp. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. """ self.role = role self.train_instance_count = train_instance_count self.train_instance_type = train_instance_type self.train_volume_size = train_volume_size self.train_max_run = train_max_run self.input_mode = input_mode if self.train_instance_type in ('local', 'local_gpu'): if self.train_instance_type == 'local_gpu' and self.train_instance_count > 1: raise RuntimeError( "Distributed Training in Local GPU is not supported") self.sagemaker_session = sagemaker_session or LocalSession() else: self.sagemaker_session = sagemaker_session or Session() self.base_job_name = base_job_name self._current_job_name = None self.output_path = output_path self.output_kms_key = output_kms_key self.latest_training_job = None
def upload(local_path, desired_s3_uri, kms_key=None, session=None): """Static method that uploads a given file or directory to S3. Args: local_path (str): A local path to a file or directory. desired_s3_uri (str): The desired S3 uri to upload to. kms_key (str): The KMS key to use to encrypt the files. session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. Returns: The S3 uri of the uploaded file(s). """ if session is not None: _session_v2_rename_warning(session) sagemaker_session = session or Session() bucket, key_prefix = parse_s3_url(url=desired_s3_uri) if kms_key is not None: extra_args = {"SSEKMSKeyId": kms_key} else: extra_args = None return sagemaker_session.upload_data( path=local_path, bucket=bucket, key_prefix=key_prefix, extra_args=extra_args )
def _init_params(self, parent_data_objs): self._output_model_filepath = self._params['output_model_filepath'] self._train_set, valid_set, test_set = parent_data_objs self._print_statistics_info(self._train_set, valid_set, test_set) self._num_features = len(self._train_set[0][0]) self._bucket_name = self._params.get('bucket_name') if not self._bucket_name: self._bucket_name = Session().default_bucket() self._data_location = self._params.get('data_location') if not self._data_location: self._data_location = 'training/kmeans/data' self._output_location = self._params.get('output_location') if not self._output_location: self._output_location = 's3://{}/training/kmeans/output'.format(self._bucket_name) else: self._output_location = 's3://{}/{}'.format(self._bucket_name, self._output_location) self._skip_s3_dataset_uploading = str2bool(self._params.get('skip_s3_dataset_uploading')) self._instance_count = self._params.get('instance_count', 1) self._instance_type = self._params.get('instance_type', 'ml.c4.xlarge') self._volume_size_in_gb = self._params.get('volume_size_in_gb', 50) self._hyper_parameter_k = self._params.get('hyper_parameter_k', 10) self._epochs = self._params.get('epochs', 1) self._mini_batch_size = self._params.get('mini_batch_size', 500) self._max_runtime_in_seconds = self._params.get('max_runtime_in_seconds', 86400)
def download(s3_uri, local_path, kms_key=None, session=None): """Static method that downloads a given S3 uri to the local machine. Args: s3_uri (str): An S3 uri to download from. local_path (str): A local path to download the file(s) to. kms_key (str): The KMS key to use to decrypt the files. session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. """ if session is not None: _session_v2_rename_warning(session) sagemaker_session = session or Session() bucket, key_prefix = parse_s3_url(url=s3_uri) if kms_key is not None: extra_args = {"SSECustomerKey": kms_key} else: extra_args = None sagemaker_session.download_data( path=local_path, bucket=bucket, key_prefix=key_prefix, extra_args=extra_args )
def from_string( cls, constraints_file_string, kms_key=None, file_name=None, sagemaker_session=None ): """Generates a Constraints object from an s3 uri. Args: constraints_file_string (str): The uri of the constraints JSON file. kms_key (str): The kms key to be used to encrypt the file in S3. file_name (str): The file name to use when uploading to S3. sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker interactions (default: None). If not specified, one is created using the default AWS configuration chain. Returns: sagemaker.model_monitor.Constraints: The instance of Constraints generated from the s3 uri. """ sagemaker_session = sagemaker_session or Session() file_name = file_name or "constraints.json" desired_s3_uri = s3.s3_path_join( "s3://", sagemaker_session.default_bucket(), "monitoring", str(uuid.uuid4()), file_name ) s3_uri = s3.S3Uploader.upload_string_as_file_body( body=constraints_file_string, desired_s3_uri=desired_s3_uri, kms_key=kms_key, sagemaker_session=sagemaker_session, ) return Constraints.from_s3_uri( constraints_file_s3_uri=s3_uri, kms_key=kms_key, sagemaker_session=sagemaker_session )
def __init__(self, model_data, role, sagemaker_session=None): sagemaker_session = sagemaker_session or Session() repo = '{}:{}'.format(LinearLearner.repo_name, LinearLearner.repo_version) image = '{}/{}'.format(registry(sagemaker_session.boto_session.region_name), repo) super(LinearLearnerModel, self).__init__(model_data, image, role, predictor_cls=LinearLearnerPredictor, sagemaker_session=sagemaker_session)
def __init__( self, endpoint_name, sagemaker_session=None, serializer=IdentitySerializer(), deserializer=BytesDeserializer(), ): """Initialize a ``Predictor``. Behavior for serialization of input data and deserialization of result data can be configured through initializer arguments. If not specified, a sequence of bytes is expected and the API sends it in the request body without modifications. In response, the API returns the sequence of bytes from the prediction result without any modifications. Args: endpoint_name (str): Name of the Amazon SageMaker endpoint to which requests are sent. sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker interactions (default: None). If not specified, one is created using the default AWS configuration chain. serializer (:class:`~sagemaker.serializers.BaseSerializer`): A serializer object, used to encode data for an inference endpoint (default: :class:`~sagemaker.serializers.IdentitySerializer`). deserializer (:class:`~sagemaker.deserializers.BaseDeserializer`): A deserializer object, used to decode data from an inference endpoint (default: :class:`~sagemaker.deserializers.BytesDeserializer`). """ self.endpoint_name = endpoint_name self.sagemaker_session = sagemaker_session or Session() self.serializer = serializer self.deserializer = deserializer self._endpoint_config_name = self._get_endpoint_config_name() self._model_names = self._get_model_names()
def __init__( self, predictor, name=None, ): """Initialize an ``AsyncPredictor``. Args: predictor (sagemaker.predictor.Predictor): General ``Predictor`` object has useful methods and variables. ``AsyncPredictor`` stands on top of it with capability for async inference. """ self.predictor = predictor self.endpoint_name = predictor.endpoint_name self.sagemaker_session = predictor.sagemaker_session or Session() if self.sagemaker_session.s3_client is None: self.s3_client = self.sagemaker_session.boto_session.client( "s3", region_name=self.sagemaker_session.boto_region_name, ) else: self.s3_client = self.sagemaker_session.s3_client self.serializer = predictor.serializer self.deserializer = predictor.deserializer self.name = name self._endpoint_config_name = None self._model_names = None self._context = None self._input_path = None
def __init__(self, model_data, role, sagemaker_session=None, **kwargs): """Initialization for LinearLearnerModel. Args: model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access training data and model artifacts. After the endpoint is created, the inference code might use the IAM role, if it needs to access an AWS resource. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. **kwargs: Keyword arguments passed to the ``FrameworkModel`` initializer. """ sagemaker_session = sagemaker_session or Session() image_uri = image_uris.retrieve( LinearLearner.repo_name, sagemaker_session.boto_region_name, version=LinearLearner.repo_version, ) super(LinearLearnerModel, self).__init__(image_uri, model_data, role, predictor_cls=LinearLearnerPredictor, sagemaker_session=sagemaker_session, **kwargs)
def __init__(self, engine): super(SageMakerKMeansBatchPredictorIT, self).__init__(engine) self._dataset_s3_url = None self._bucket_name = None self._local_model_filepath = None self._model_s3_filepath = None self._results_s3_location = None self._model_name = None self._job_name = None self._instance_type = None self._instance_count = None self._sagemaker_session = Session() self._sagemaker_client = boto3.client('sagemaker') self._aws_helper = AwsHelper(self._logger) self._job_monitor = None
def attach(cls, transform_job_name, sagemaker_session=None): """Attach an existing transform job to a new Transformer instance Args: transform_job_name (str): Name for the transform job to be attached. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, one will be created using the default AWS configuration chain. Returns: sagemaker.transformer.Transformer: The Transformer instance with the specified transform job attached. """ sagemaker_session = sagemaker_session or Session() job_details = sagemaker_session.sagemaker_client.describe_transform_job( TransformJobName=transform_job_name ) init_params = cls._prepare_init_params_from_job_description(job_details) transformer = cls(sagemaker_session=sagemaker_session, **init_params) transformer.latest_transform_job = _TransformJob( sagemaker_session=sagemaker_session, job_name=init_params["base_transform_job_name"] ) return transformer
def attach(cls, training_job_name, sagemaker_session=None, **kwargs): """Attach to an existing training job. Create an Estimator bound to an existing training job. After attaching, if the training job has a Complete status, it can be ``deploy()`` ed to create a SageMaker Endpoint and return a ``Predictor``. If the training job is in progress, attach will block and display log messages from the training job, until the training job completes. Args: training_job_name (str): The name of the training job to attach to. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Estimator` constructor. Returns: sagemaker.estimator.Framework: ``Estimator`` with the attached training job. """ sagemaker_session = sagemaker_session or Session() if training_job_name is not None: job_details = sagemaker_session.sagemaker_client.describe_training_job( TrainingJobName=training_job_name) init_params, hp, _ = cls._prepare_estimator_params_from_job_description( job_details) else: # this case is only valid when called from inheriting class and then the class must declare framework if not hasattr(cls, '__framework_name__'): raise ValueError('must specify training_job name') init_params = dict(kwargs) hp = init_params.pop('hyperparameters') # parameters for framework classes framework_init_params = dict() framework_init_params['entry_point'] = json.loads( hp.get(SCRIPT_PARAM_NAME)) framework_init_params['source_dir'] = json.loads( hp.get(DIR_PARAM_NAME)) framework_init_params['enable_cloudwatch_metrics'] = json.loads( hp.get(CLOUDWATCH_METRICS_PARAM_NAME)) framework_init_params['container_log_level'] = json.loads( hp.get(CONTAINER_LOG_LEVEL_PARAM_NAME)) # drop json and remove other SageMaker specific additions hyperparameters = {entry: json.loads(hp[entry]) for entry in hp} framework_init_params['hyperparameters'] = hyperparameters init_params.update(framework_init_params) estimator = cls(sagemaker_session=sagemaker_session, **init_params) estimator.latest_training_job = _TrainingJob( sagemaker_session=sagemaker_session, training_job_name=init_params['base_job_name']) estimator.latest_training_job.wait() estimator.uploaded_code = UploadedCode(estimator.source_dir, estimator.entry_point) return estimator
def attach(cls, training_job_name, sagemaker_session=None, model_channel_name="model"): """Attach to an existing training job. Create an Estimator bound to an existing training job, each subclass is responsible to implement ``_prepare_init_params_from_job_description()`` as this method delegates the actual conversion of a training job description to the arguments that the class constructor expects. After attaching, if the training job has a Complete status, it can be ``deploy()`` ed to create a SageMaker Endpoint and return a ``Predictor``. If the training job is in progress, attach will block and display log messages from the training job, until the training job completes. Examples: >>> my_estimator.fit(wait=False) >>> training_job_name = my_estimator.latest_training_job.name Later on: >>> attached_estimator = Estimator.attach(training_job_name) >>> attached_estimator.deploy() Args: training_job_name (str): The name of the training job to attach to. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. model_channel_name (str): Name of the channel where pre-trained model data will be downloaded (default: 'model'). If no channel with the same name exists in the training job, this option will be ignored. Returns: Instance of the calling ``Estimator`` Class with the attached training job. """ sagemaker_session = sagemaker_session or Session() job_details = sagemaker_session.sagemaker_client.describe_training_job( TrainingJobName=training_job_name ) init_params = cls._prepare_init_params_from_job_description(job_details, model_channel_name) tags = sagemaker_session.sagemaker_client.list_tags( ResourceArn=job_details["TrainingJobArn"] )["Tags"] init_params.update(tags=tags) estimator = cls(sagemaker_session=sagemaker_session, **init_params) estimator.latest_training_job = _TrainingJob( sagemaker_session=sagemaker_session, job_name=training_job_name ) estimator._current_job_name = estimator.latest_training_job.name estimator.latest_training_job.wait() # pylint gets confused thinking that estimator is an EstimatorBase instance, but it actually # is a Framework or any of its derived classes. We can safely ignore the no-member errors. estimator.uploaded_code = UploadedCode( estimator.source_dir, estimator.entry_point # pylint: disable=no-member ) return estimator
def __init__(self, model_data, image, role, predictor_cls=None, env=None, name=None, sagemaker_session=None): """Initialize an SageMaker ``Model``. Args: model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. image (str): A Docker image URI. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access training data and model artifacts. After the endpoint is created, the inference code might use the IAM role, if it needs to access an AWS resource. predictor_cls (callable[string, sagemaker.session.Session]): A function to call to create a predictor (default: None). If not None, ``deploy`` will return the result of invoking this function on the created endpoint name. env (dict[str, str]): Environment variables to run with ``image`` when hosted in SageMaker (default: None). name (str): The model name. If None, a default model name will be selected on each ``deploy``. sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker interactions (default: None). If not specified, one is created using the default AWS configuration chain. """ self.model_data = model_data self.image = image self.role = role self.predictor_cls = predictor_cls self.env = env or {} self.name = name self.sagemaker_session = sagemaker_session or Session() self._model_name = None
def upload_string_as_file_body(body, desired_s3_uri=None, kms_key=None, session=None): """Static method that uploads a given file or directory to S3. Args: body (str): String representing the body of the file. desired_s3_uri (str): The desired S3 uri to upload to. kms_key (str): The KMS key to use to encrypt the files. session (sagemaker.session.Session): AWS session to use. Automatically generates one if not provided. Returns: str: The S3 uri of the uploaded file(s). """ if session is not None: _session_v2_rename_warning(session) sagemaker_session = session or Session() bucket, key = parse_s3_url(desired_s3_uri) sagemaker_session.upload_string_as_file_body( body=body, bucket=bucket, key=key, kms_key=kms_key ) return desired_s3_uri
def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags=None): """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, this method returns a the result of invoking ``self.predictor_cls`` on the created endpoint name. The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns The name of the created endpoint is accessible in the ``endpoint_name`` field of this ``Model`` after deploy returns. Args: instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. """ if not self.sagemaker_session: if instance_type in ('local', 'local_gpu'): self.sagemaker_session = LocalSession() else: self.sagemaker_session = Session() container_def = self.prepare_container_def(instance_type) self.name = self.name or name_from_image(container_def['Image']) self.sagemaker_session.create_model(self.name, self.role, container_def, vpc_config=self.vpc_config) production_variant = sagemaker.production_variant( self.name, instance_type, initial_instance_count) self.endpoint_name = endpoint_name or self.name self.sagemaker_session.endpoint_from_production_variants( self.endpoint_name, [production_variant], tags) if self.predictor_cls: return self.predictor_cls(self.endpoint_name, self.sagemaker_session)
def attach(cls, auto_ml_job_name, sagemaker_session=None): """Attach to an existing AutoML job. Creates and returns a AutoML bound to an existing automl job. Args: auto_ml_job_name (str): AutoML job name sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker interactions (default: None). If not specified, the one originally associated with the ``AutoML`` instance is used. Returns: sagemaker.automl.AutoML: A ``AutoML`` instance with the attached automl job. """ sagemaker_session = sagemaker_session or Session() auto_ml_job_desc = sagemaker_session.describe_auto_ml_job(auto_ml_job_name) automl_job_tags = sagemaker_session.sagemaker_client.list_tags( ResourceArn=auto_ml_job_desc["AutoMLJobArn"] )["Tags"] amlj = AutoML( role=auto_ml_job_desc["RoleArn"], target_attribute_name=auto_ml_job_desc["InputDataConfig"][0]["TargetAttributeName"], output_kms_key=auto_ml_job_desc["OutputDataConfig"].get("KmsKeyId"), output_path=auto_ml_job_desc["OutputDataConfig"]["S3OutputPath"], base_job_name=auto_ml_job_name, compression_type=auto_ml_job_desc["InputDataConfig"][0].get("CompressionType"), sagemaker_session=sagemaker_session, volume_kms_key=auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("SecurityConfig", {}) .get("VolumeKmsKeyId"), encrypt_inter_container_traffic=auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("SecurityConfig", {}) .get("EnableInterContainerTrafficEncryption", False), vpc_config=auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("SecurityConfig", {}) .get("VpcConfig"), problem_type=auto_ml_job_desc.get("ProblemType"), max_candidates=auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("CompletionCriteria", {}) .get("MaxCandidates"), max_runtime_per_training_job_in_seconds=auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("CompletionCriteria", {}) .get("MaxRuntimePerTrainingJobInSeconds"), total_job_runtime_in_seconds=auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("CompletionCriteria", {}) .get("MaxAutoMLJobRuntimeInSeconds"), job_objective=auto_ml_job_desc.get("AutoMLJobObjective", {}).get("MetricName"), generate_candidate_definitions_only=auto_ml_job_desc.get( "GenerateCandidateDefinitionsOnly", False ), tags=automl_job_tags, ) amlj.current_job_name = auto_ml_job_name amlj.latest_auto_ml_job = auto_ml_job_name # pylint: disable=W0201 amlj._auto_ml_job_desc = auto_ml_job_desc return amlj
def attach(cls, tuning_job_name, sagemaker_session=None, job_details=None, estimator_cls=None): """Attach to an existing hyperparameter tuning job. Create a HyperparameterTuner bound to an existing hyperparameter tuning job. After attaching, if there exists a best training job (or any other completed training job), that can be deployed to create an Amazon SageMaker Endpoint and return a ``Predictor``. Examples: >>> my_tuner.fit() >>> job_name = my_tuner.latest_tuning_job.name Later on: >>> attached_tuner = HyperparameterTuner.attach(job_name) >>> attached_tuner.deploy() Args: tuning_job_name (str): The name of the hyperparameter tuning job to attach to. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, one is created using the default AWS configuration chain. job_details (dict): The response to a ``DescribeHyperParameterTuningJob`` call. If not specified, the ``HyperparameterTuner`` will perform one such call with the provided hyperparameter tuning job name. estimator_cls (str): The estimator class name associated with the training jobs, e.g. 'sagemaker.estimator.Estimator'. If not specified, the ``HyperparameterTuner`` will try to derive the correct estimator class from training job metadata, defaulting to :class:~`sagemaker.estimator.Estimator` if it is unable to determine a more specific class. Returns: sagemaker.tuner.HyperparameterTuner: A ``HyperparameterTuner`` instance with the attached hyperparameter tuning job. """ sagemaker_session = sagemaker_session or Session() if job_details is None: job_details = sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job( HyperParameterTuningJobName=tuning_job_name) estimator_cls = cls._prepare_estimator_cls( estimator_cls, job_details["TrainingJobDefinition"]) estimator = cls._prepare_estimator_from_job_description( estimator_cls, job_details, sagemaker_session) init_params = cls._prepare_init_params_from_job_description( job_details) tuner = cls(estimator=estimator, **init_params) tuner.latest_tuning_job = _TuningJob( sagemaker_session=sagemaker_session, job_name=tuning_job_name) return tuner
def _create_sagemaker_pipeline_model(self, instance_type): """Create a SageMaker Model Entity Args: instance_type (str): The EC2 instance type that this Model will be used for, this is only used to determine if the image needs GPU support or not. accelerator_type (str): Type of Elastic Inference accelerator to attach to an endpoint for model loading and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator will be attached to the endpoint. """ if not self.sagemaker_session: self.sagemaker_session = Session() containers = self.pipeline_container_def(instance_type) self.name = self.name or name_from_image(containers[0]['Image']) self.sagemaker_session.create_model(self.name, self.role, containers, vpc_config=self.vpc_config)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data_dir', default='data/') args = parser.parse_args() data_dir = Path(args.data_dir) if not data_dir.exists(): (x_train, y_train), (x_test, y_test) = mnist.load_data() data_dir.mkdir() np.savez(str(data_dir / 'train'), image=x_train, label=y_train) np.savez(str(data_dir / 'test'), image=x_test, label=y_test) session = Session() s3_bucket_name = os.getenv('S3_BUCKET_NAME', session.default_bucket()) session.upload_data(path=str(data_dir), bucket=s3_bucket_name, key_prefix='dataset/mnist')
def main(): session = Session() s3_bucket_name = os.getenv('S3_BUCKET_NAME', session.default_bucket()) parser = argparse.ArgumentParser() parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--epochs', type=int, default=10) parser.add_argument('--role', default=os.environ['SAGEMAKER_ROLE']) parser.add_argument('--input_data', default=f's3://{s3_bucket_name}/dataset/mnist') parser.add_argument('--output_path', default=f's3://{s3_bucket_name}/training') parser.add_argument('--train_instance_type', default='ml.m5.large') parser.add_argument('--wait', action='store_true') # parser.add_argument('--deploy', action='store_true') args = parser.parse_args() input_data = {'dataset': args.input_data} job_name = 'mnist-' + dt.now().strftime('%Y-%m-%d-%H-%M') hyperparameters = {'batch_size': args.batch_size, 'epochs': args.epochs} metric_definitions = [ { 'Name': 'train loss', 'Regex': r'loss: (\S+)' }, { 'Name': 'valid loss', 'Regex': r'val_loss: (\S+)' }, ] estimator = TensorFlow(entry_point='train.py', source_dir='src', role=args.role, train_instance_count=1, train_instance_type=args.train_instance_type, train_volume_size=30, train_max_run=86400, output_path=args.output_path, code_location=args.output_path, py_version='py3', framework_version='1.12.0', hyperparameters=hyperparameters, metric_definitions=metric_definitions) estimator.fit(input_data, wait=args.wait, job_name=job_name)
def _create_sagemaker_pipeline_model(self, instance_type): """Create a SageMaker Model Entity Args: instance_type (str): The EC2 instance type that this Model will be used for, this is only used to determine if the image needs GPU support or not. """ if not self.sagemaker_session: self.sagemaker_session = Session() containers = self.pipeline_container_def(instance_type) self.name = self.name or name_from_image(containers[0]["Image"]) self.sagemaker_session.create_model( self.name, self.role, containers, vpc_config=self.vpc_config )
def __init__(self, model_data, role, sagemaker_session=None): sagemaker_session = sagemaker_session or Session() image = registry( sagemaker_session.boto_session.region_name) + "/" + KMeans.repo super(KMeansModel, self).__init__(model_data, image, role, predictor_cls=KMeansPredictor, sagemaker_session=sagemaker_session)
def save_to_feature_store(): logger.info("Save to FeatureStore started") global feature_group df_data = pd.read_csv(feature_s3_url) logger.info("Read data from S3: %s", df_data.head()) feature_store_session = Session( boto_session=boto_session, sagemaker_client=sagemaker_client, sagemaker_featurestore_runtime_client=featurestore_runtime) # You can modify the following to use a bucket of your choosing logger.info("Default bucket: %s", default_bucket) # record identifier and event time feature names record_identifier_feature_name = "IDpol" event_time_feature_name = "EventTime" current_time_sec = int(round(time.time())) # cast object dtype to string. The SageMaker FeatureStore Python SDK will then map the string dtype to String feature type. cast_object_to_string(df_data) df_data[event_time_feature_name] = pd.Series([current_time_sec] * len(df_data), dtype="float64") feature_group_name = 'insurance-policy-feature-group-' + strftime( '%d-%H-%M-%S', gmtime()) logger.info("Feature Group Name: %s", feature_group_name) # Check if feature group already exists. Create a feature group if doesn't exist. if feature_group_exist(feature_group_name) == False: logger.info("Feature Group: %s doesn't exist. Create a new one.", feature_group) feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=feature_store_session) # append EventTime feature # load feature definitions to the feature group. SageMaker FeatureStore Python SDK will auto-detect the data schema based on input data. feature_group.load_feature_definitions(data_frame=df_data) # output is suppressed feature_group.create( s3_uri=f"s3://{default_bucket}/{prefix}", record_identifier_name=record_identifier_feature_name, event_time_feature_name=event_time_feature_name, role_arn=get_execution_role(), enable_online_store=True) wait_for_feature_group_creation_complete(feature_group=feature_group) feature_group.describe() else: logger.info("Feature Group: %s exits", feature_group) # Init feature group object if already exists feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=feature_store_session) # ingest data into feature store feature_group.ingest(data_frame=df_data, max_workers=5, wait=True)
def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags=None): """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, this method returns a the result of invoking ``self.predictor_cls`` on the created endpoint name. The name of the created endpoint is accessible in the ``endpoint_name`` field of this ``Model`` after deploy returns. Args: instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. tags (list[dict[str, str]]): A list of key-value pairs for tagging the endpoint (default: None). Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. """ if not self.sagemaker_session: if instance_type in ('local', 'local_gpu'): self.sagemaker_session = LocalSession() else: self.sagemaker_session = Session() container_def = self.prepare_container_def(instance_type) model_name = self.name or name_from_image(container_def['Image']) self.sagemaker_session.create_model(model_name, self.role, container_def) production_variant = sagemaker.production_variant(model_name, instance_type, initial_instance_count) self.endpoint_name = endpoint_name or model_name self.sagemaker_session.endpoint_from_production_variants(self.endpoint_name, [production_variant], tags) if self.predictor_cls: return self.predictor_cls(self.endpoint_name, self.sagemaker_session)
class Model(object): """A SageMaker ``Model`` that can be deployed to an ``Endpoint``.""" def __init__(self, model_data, image, role, predictor_cls=None, env=None, name=None, sagemaker_session=None): """Initialize an SageMaker ``Model``. Args: model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. image (str): A Docker image URI. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access training data and model artifacts. After the endpoint is created, the inference code might use the IAM role, if it needs to access an AWS resource. predictor_cls (callable[string, sagemaker.session.Session]): A function to call to create a predictor (default: None). If not None, ``deploy`` will return the result of invoking this function on the created endpoint name. env (dict[str, str]): Environment variables to run with ``image`` when hosted in SageMaker (default: None). name (str): The model name. If None, a default model name will be selected on each ``deploy``. sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker interactions (default: None). If not specified, one is created using the default AWS configuration chain. """ self.model_data = model_data self.image = image self.role = role self.predictor_cls = predictor_cls self.env = env or {} self.name = name self.sagemaker_session = sagemaker_session self._model_name = None def prepare_container_def(self, instance_type): """Return a dict created by ``sagemaker.container_def()`` for deploying this model to a specified instance type. Subclasses can override this to provide custom container definitions for deployment to a specific instance type. Called by ``deploy()``. Args: instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. Returns: dict: A container definition object usable with the CreateModel API. """ return sagemaker.container_def(self.image, self.model_data, self.env) def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags=None): """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, this method returns a the result of invoking ``self.predictor_cls`` on the created endpoint name. The name of the created endpoint is accessible in the ``endpoint_name`` field of this ``Model`` after deploy returns. Args: instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. tags (list[dict[str, str]]): A list of key-value pairs for tagging the endpoint (default: None). Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. """ if not self.sagemaker_session: if instance_type in ('local', 'local_gpu'): self.sagemaker_session = LocalSession() else: self.sagemaker_session = Session() container_def = self.prepare_container_def(instance_type) model_name = self.name or name_from_image(container_def['Image']) self.sagemaker_session.create_model(model_name, self.role, container_def) production_variant = sagemaker.production_variant(model_name, instance_type, initial_instance_count) self.endpoint_name = endpoint_name or model_name self.sagemaker_session.endpoint_from_production_variants(self.endpoint_name, [production_variant], tags) if self.predictor_cls: return self.predictor_cls(self.endpoint_name, self.sagemaker_session)