def deploy_xgboost(self, s3_model_location, instance_count, instance_type, framework_version, model_server_workers=None, tags=None, endpoint_name=None): model = sagemaker.xgboost.model.XGBoostModel( role=self.role, model_data=s3_model_location, framework_version=framework_version, py_version="py3", source_dir=os.path.join(_FILE_DIR_PATH, "xgboost_code"), entry_point="xgboost_inference.py", model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session) try: predictor = model.deploy(instance_type=instance_type, initial_instance_count=instance_count, tags=tags, endpoint_name=endpoint_name) except botocore.exceptions.ClientError: # ValueError raised if there is no endpoint already predictor = sage.Predictor( endpoint_name=endpoint_name, sagemaker_session=self.sagemaker_session) predictor.update_endpoint(initial_instance_count=instance_count, instance_type=instance_type, tags=tags, model_name=model.name) return predictor.endpoint_name
def deploy(self, image_name, s3_model_location, train_instance_count, train_instance_type, tags=None, endpoint_name=None): """ Deploy model to SageMaker :param image_name: [str], name of Docker image :param s3_model_location: [str], model location in S3 :param train_instance_count: [str], number of ec2 instances :param train_instance_type: [str], ec2 instance type :param tags: [optional[list[dict]], default: None], List of tags for labeling a training job. For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. Example: [ { 'Key': 'key_name_1', 'Value': key_value_1, }, { 'Key': 'key_name_2', 'Value': key_value_2, }, ... ] :param endpoint_name: [optional[str]], Optional name for the SageMaker endpoint :return: [str], endpoint name """ image = self._construct_image_location(image_name) model = sage.Model(model_data=s3_model_location, image_uri=image, role=self.role, sagemaker_session=self.sagemaker_session) try: model.deploy(initial_instance_count=train_instance_count, instance_type=train_instance_type, tags=tags, endpoint_name=endpoint_name) return model.endpoint_name except botocore.exceptions.ClientError: # ValueError raised if there is no endpoint already predictor = sage.Predictor( endpoint_name=endpoint_name, sagemaker_session=self.sagemaker_session) predictor.update_endpoint( initial_instance_count=train_instance_count, instance_type=train_instance_type, tags=tags, model_name=model.name) return predictor.endpoint_name
def deploy_hugging_face(self, instance_count, instance_type, transformers_version=None, pytorch_version=None, tensorflow_version=None, s3_model_location=None, hub=None, model_server_workers=None, tags=None, endpoint_name=None): def _validate_either_of_them(name_a, name_b, var_a, var_b): if var_a is not None and var_b is not None: raise ValueError(f'{name_a} and {name_b} are both not None. ' f'Specify only {name_a} or {name_b}.') if var_a is None and var_b is None: raise ValueError("{name_a} and {name_b} are both None. " "Specify either {name_a} or {name_b}.") _validate_either_of_them(name_a='pytorch_version', name_b='tensorflow_version', var_a=pytorch_version, var_b=tensorflow_version) _validate_either_of_them(name_a='model_location', name_b='hub', var_a=s3_model_location, var_b=hub) model = sagemaker.huggingface.HuggingFaceModel( role=self.role, model_data=s3_model_location, transformers_version=transformers_version, pytorch_version=pytorch_version, tensorflow_version=tensorflow_version, model_server_workers=model_server_workers, py_version='py36', env=hub, sagemaker_session=self.sagemaker_session) try: predictor = model.deploy(instance_type=instance_type, initial_instance_count=instance_count, tags=tags, endpoint_name=endpoint_name) except botocore.exceptions.ClientError: # ValueError raised if there is no endpoint already predictor = sage.Predictor( endpoint_name=endpoint_name, sagemaker_session=self.sagemaker_session) predictor.update_endpoint(initial_instance_count=instance_count, instance_type=instance_type, tags=tags, model_name=model.name) return predictor.endpoint_name
def test_predict_jsonlines(tfs_predictor): input_data = "[1.0, 2.0, 5.0]\n[1.0, 2.0, 5.0]" expected_result = {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} predictor = sagemaker.Predictor( tfs_predictor.endpoint_name, tfs_predictor.sagemaker_session, serializer=IdentitySerializer(content_type="application/jsonlines"), deserializer=JSONDeserializer(), ) result = predictor.predict(input_data) assert expected_result == result
def predict_wrapper(endpoint, session): return sagemaker.Predictor(endpoint, session, serializer=CSVSerializer())