Python Session示例，sagemaker.Session Python示例

示例#1

0

显示文件

文件： __init__.py 项目： yangaws/sagemaker-containers

def sagemaker_session(
        region_name=DEFAULT_REGION):  # type: (str) -> sagemaker.Session
    return sagemaker.Session(boto3.Session(region_name=region_name))

示例#2

0

显示文件

    for line in lines[1:]:
        line = line.strip()
        fds = line.split(',')
        fds[0] = fds[-1]
        fds = fds[0:-1]
        new_line = ','.join(fds)
        new_lines.append(new_line)
    with open(new_fn, 'w') as fout:
        fout.write(os.linesep.join(new_lines))
    return new_fn

ON_SAGEMAKER_NOTEBOOK = False

# preparation
sm_boto3 = boto3.client('sagemaker')
sess = sagemaker.Session()
region = sess.boto_session.region_name
bucket = sess.default_bucket()  # this could also be a hard-coded bucket name
print('Using bucket ' + bucket)
sm_role = get_sm_execution_role(ON_SAGEMAKER_NOTEBOOK, region)

# Prepare data
data = load_boston()

X_train, X_test, y_train, y_test = train_test_split(
    data.data, data.target, test_size=0.25, random_state=42)

trainX = pd.DataFrame(X_train, columns=data.feature_names)
trainX['target'] = y_train

testX = pd.DataFrame(X_test, columns=data.feature_names)

示例#3

0

显示文件

buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf,
                                 np.array(train_X).astype('float32'),
                                 np.array(train_y).astype('float32'))
#smac.write_numpy_to_dense_tensor(buf, array_data, labels)
buf.seek(0)

#Uploading linear_traindata to S3
key = 'Example-Datasets/lineartrain2.data'
boto3.resource('s3').Bucket(bucket).Object(key).upload_fileobj(buf)
s3_train_data = 's3://{}/{}'.format(bucket, key)
print('uploaded training data location: {}'.format(s3_train_data))

# In[13]:

sess = sagemaker.Session()

#Setting Docker Image to deploy the model
from sagemaker.amazon.amazon_estimator import get_image_uri
container = get_image_uri(boto3.Session().region_name, 'linear-learner')

linear = sagemaker.estimator.Estimator(container,
                                       role,
                                       train_instance_count=1,
                                       train_instance_type='ml.m4.xlarge',
                                       output_path=output_location,
                                       sagemaker_session=sess)

# In[14]:

#Providing hyperparameters

示例#4

0

显示文件

def get_sagemaker_session(region, default_bucket=None):
    return sagemaker.Session(boto_session=boto3.Session(region_name=region),
                             default_bucket=default_bucket)

示例#5

0

显示文件

subprocess.call([sys.executable, '-m', 'pip', 'install', 'sagemaker==1.13.0'])

from collections import defaultdict

import boto3
import numpy as np
import prettytable
import sagemaker
import scipy.cluster.hierarchy as hcluster
from sagemaker import get_execution_role
from sagemaker.mxnet import MXNetPredictor
from sagemaker.predictor import RealTimePredictor, json_serializer, json_deserializer

role = get_execution_role()
session = boto3.Session(region_name='us-west-2')
sagemaker_session = sagemaker.Session(boto_session=session)


def l2_distance(field, value):
    return np.linalg.norm(
        (np.array(field["center"]) - np.array(value["center"])))


def get_center(bbox):  # {'top': 911, 'height': 31, 'width': 328, 'left': 961}
    return bbox['top'] + bbox['height'] / 2, bbox["left"] + bbox["width"] / 2


class JSONPredictor(RealTimePredictor):
    def __init__(self, endpoint_name, sagemaker_session):
        super(JSONPredictor, self).__init__(endpoint_name, sagemaker_session,
                                            json_serializer, json_deserializer)

示例#6

0

显示文件

文件： TextClassifcation_ScriptMode.py 项目： liang-wu-1985/Migrate_Tensorflow_Code_To_AWS

print('predictions: \t{}'.format(np.argmax(local_results, axis=1)))
print('target values: \t{}'.format(np.argmax(y_val[:10], axis=1)))

# In[23]:

local_predictor.delete_endpoint()

# In[24]:

s3_prefix = 'tf-txt-classfication-claims'

traindata_s3_prefix = '{}/data/train'.format(s3_prefix)
valdata_s3_prefix = '{}/data/val'.format(s3_prefix)
embeddingdata_s3_prefix = '{}/data/embedding'.format(s3_prefix)

train_s3 = sagemaker.Session().upload_data(path='./data/train/',
                                           key_prefix=traindata_s3_prefix)
val_s3 = sagemaker.Session().upload_data(path='./data/val/',
                                         key_prefix=valdata_s3_prefix)
embedding_s3 = sagemaker.Session().upload_data(
    path='./data/embedding/', key_prefix=embeddingdata_s3_prefix)

inputs = {'train': train_s3, 'val': val_s3, 'embedding': embedding_s3}
print(inputs)

# In[27]:

train_instance_type = 'ml.m4.xlarge'
hyperparameters = {
    'epochs': 1,
    'batch_size': 128,
    'num_words': num_words,

示例#7

0

显示文件

文件： preprocess-scikit-text-to-bert-feature-store.py 项目： nithin8702/workshop

# TODO:  Remove hard-coding
# Check out this for a list of env vars:  https://github.com/aws/sagemaker-containers#sm-training-env
region = 'us-east-1'
os.environ['AWS_DEFAULT_REGION'] = region

sm = boto3.Session(region_name=region).client(service_name='sagemaker',
                                              region_name=region)

featurestore_runtime = boto3.Session(region_name=region).client(
    service_name='sagemaker-featurestore-runtime', region_name=region)

s3 = boto3.Session(region_name=region).client(service_name='s3',
                                              region_name=region)

sagemaker_session = sagemaker.Session(
    boto_session=boto3.Session(region_name=region),
    sagemaker_client=sm,
    sagemaker_featurestore_runtime_client=featurestore_runtime)

role = sagemaker.get_execution_role()
bucket = sagemaker_session.default_bucket()
############################

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

REVIEW_BODY_COLUMN = 'review_body'
REVIEW_ID_COLUMN = 'review_id'
# DATE_COLUMN = 'date'

LABEL_COLUMN = 'star_rating'
LABEL_VALUES = [1, 2, 3, 4, 5]

示例#8

0

显示文件

文件： train_aws.py 项目： rohithdesikan/evprediction

# %%
# Import base packages
import os
import numpy as np
import datetime

# Import AWS training package
import sagemaker
from sagemaker.tensorflow import TensorFlow
from sagemaker.tensorflow import TensorFlowModel
import boto3

# %%
# Set up sagemaker session
sagemaker_session = sagemaker.Session(default_bucket='rdevprediction')

# Get default bucket
bucket_name = sagemaker_session.default_bucket()
print(bucket_name)

# Get role
role = sagemaker.get_execution_role()
print(role)

# set prefix, a descriptive name for the S3 directory
prefix = 'evpred'

train_dir = f's3://{bucket_name}/{prefix}/train/'
test_dir = f's3://{bucket_name}/{prefix}/test/'
output_path = f's3://{bucket_name}/{prefix}/output/'

示例#9

0

显示文件

文件： cloudtrail-ipinsights.py 项目： jonrau1/SyntheticSun

                                        'ipaddress': clientIp
                                    }
                                    ctList.append(ctDict)
                                else:
                                    pass
            else:
                pass

keys = ctList[0].keys()
with open('ct-training-data.csv', 'w') as outf:
    dw = csv.DictWriter(outf, keys)
    dw.writerows(ctList)

outf.close()
print('Cloudtrail data training is complete, starting sagemaker job')
trainingBucket = sagemaker.Session().default_bucket()
ctTrainingOutputPath = 'cloudtrail-ipinsights'
trainingData = 'ct-training-data.csv'
trainingInstanceSize = 'ml.m5.large'
print('Uploading training data to Sagemaker default bucket')
s3resc.meta.client.upload_file('./' + trainingData, trainingBucket,
                               ctTrainingOutputPath + '/' + trainingData)
print('CT training data uploaded to S3')
trainingDataS3 = 's3://' + trainingBucket + '/' + ctTrainingOutputPath + '/' + trainingData
print(
    'Declaring sagemaker IAM role for current session. Ensure your trust policy allows sagemaker.amazonaws.com to perform sts:AssumeRole'
)
execution_role = sagemaker.get_execution_role()
print('Preparing S3 training data')
input_data = {
    'train':

示例#10

0

显示文件

 def delete_endpoint(self, endpoint):
     """
     :param endpoint:
     :return:
     """
     sagemaker.Session().delete_endpoint(endpoint=endpoint)

示例#11

0

显示文件

import os
import boto3
import re
import sagemaker
from sagemaker import get_execution_role

role = get_execution_role()
region = boto3.Session().region_name

# S3 bucket for saving code and model artifacts.
# Feel free to specify a different bucket here if you wish.
bucket = sagemaker.Session().default_bucket()
prefix = 'sagemaker/DEMO-xgboost-parquet'
bucket_path = 'https://s3-{}.amazonaws.com/{}'.format(region, bucket)

#Install PyArrow
python -m pip install pyarrow==0.15

import numpy as np
import pandas as pd
import urllib.request
from sklearn.datasets import load_svmlight_file

# Download the dataset and load into a pandas dataframe
FILE_NAME = 'abalone.csv'
urllib.request.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data", FILE_NAME)
feature_names=['Sex', 
               'Length', 
               'Diameter', 
               'Height', 
               'Whole weight',

示例#12

0

显示文件

import boto3
import sagemaker
import math
import dateutil
from time import time

start_time = time()

boto_session = boto3.Session(profile_name='cli_ml_access',
                             region_name='us-east-1')
sess = sagemaker.Session(boto_session=boto_session)

endpoint_final = 'sagemaker-scikit-learn-2019-12-18-10-11-18-055'

predictor = sagemaker.predictor.RealTimePredictor(endpoint=endpoint_final,
                                                  sagemaker_session=sess,
                                                  content_type='text/csv')

f = open('Test DF single.csv', 'r')
print(predictor.predict(f.read()))

end_time = time()

print(end_time - start_time)

示例#13

0

显示文件

文件： test_default_bucket.py 项目： xiaoyi-cheng/sagemaker-python-sdk

def sagemaker_session():
    boto_mock = MagicMock(name="boto_session", region_name=REGION)
    boto_mock.client("sts").get_caller_identity.return_value = {"Account": ACCOUNT_ID}
    sagemaker_session = sagemaker.Session(boto_session=boto_mock)
    sagemaker_session.boto_session.resource("s3").Bucket().creation_date = None
    return sagemaker_session

示例#14

0

显示文件

文件： m3_bring_your_own_code_mxnet.py 项目： mariandumitrascu/md-aws-ml-labs

print(len(data[0][0]))
print(len(data[0][1]))
print(len(data[0][2]))

print(len(data[0][27]))

# In[81]:

response = predictor.predict(data)
print('Raw prediction result:')
response

# In[82]:

labeled_predictions = list(zip(range(10), response[0]))
print('Labeled predictions: ')
labeled_predictions

# In[83]:

labeled_predictions.sort(key=lambda label_and_prob: 1.0 - label_and_prob[1])
print('Most likely answer: {}'.format(labeled_predictions[0]))

# ## Delete the prediction endpoint
#

# In[84]:

sagemaker.Session().delete_endpoint(predictor.endpoint)

示例#15

0

显示文件

文件： census.py 项目： Criviere/census


counties_transformed['labels']=list(map(int, cluster_labels))
counties_transformed.head()


# Now, we can examine one of the clusters in more detail, like cluster 1 for example. A cursory glance at the location of the centroid tells us that it has the highest value for the "Construction & Commuters" attribute. We can now see which counties fit that description.

# In[61]:


cluster=counties_transformed[counties_transformed['labels']==5]
cluster


# ## Conclusion
# 
# By clustering a dataset using KMeans and after reducing the dimensionality using a PCA model, we are able to improve the explainability of our modelling and draw an actionable conclusion. Using these techniques, we have been able to better understand the essential characteristics of different counties in the US and segment the electorate into groupings accordingly and highlight Miami-Dade County's electorate characteristics.

# In[ ]:


sagemaker.Session().delete_endpoint(pca_predictor.endpoint)


# In[ ]:


sagemaker.Session().delete_endpoint(kmeans_predictor.endpoint)

示例#16

0

显示文件

文件： run_sagemaker.py 项目： fairtiq/sagemaker-templates

tensorboard_output_config = TensorBoardOutputConfig(
    s3_output_path=f"{s3_output_location}/tensorboard",
    container_local_output_path="/opt/ml/output/tensorboard",
)

hook_config = DebuggerHookConfig(
    s3_output_path=s3_output_location,
    collection_configs=[
        CollectionConfig("weights"),
        CollectionConfig("gradients"),
        CollectionConfig("biases")
    ],
)

sess = sagemaker.Session(default_bucket=BUCKET_NAME)
role = os.environ["SAGEMAKER_ROLE"]
tag = os.environ.get("CIRCLE_BRANCH") or "latest"
account_url = os.environ["AWS_ECR_ACCOUNT_URL"]

tf_estimator = Estimator(
    role=role,
    train_instance_count=1,
    train_instance_type="ml.m5.large",
    base_job_name=tag,
    sagemaker_session=sess,
    output_path=s3_output_location,
    image_name=f"{account_url}/{REPO_NAME}:{tag}",
    hyperparameters={
        "epochs": 200,
        "batch_size": 25,

示例#17

0

显示文件

caller_identity = sts.get_caller_identity()
print("caller_identity: {}".format(caller_identity))

assumed_role_arn = caller_identity["Arn"]
print("(assumed_role) caller_identity_arn: {}".format(assumed_role_arn))

assumed_role_name = assumed_role_arn.split("/")[-2]

iam = boto3.Session(region_name=region).client(service_name="iam",
                                               region_name=region)
get_role_response = iam.get_role(RoleName=assumed_role_name)
print("get_role_response {}".format(get_role_response))
role = get_role_response["Role"]["Arn"]
print("role {}".format(role))

bucket = sagemaker.Session().default_bucket()
print("The DEFAULT BUCKET is {}".format(bucket))
#############################

sm = boto3.Session(region_name=region).client(service_name="sagemaker",
                                              region_name=region)

featurestore_runtime = boto3.Session(region_name=region).client(
    service_name="sagemaker-featurestore-runtime", region_name=region)

s3 = boto3.Session(region_name=region).client(service_name="s3",
                                              region_name=region)

sagemaker_session = sagemaker.Session(
    boto_session=boto3.Session(region_name=region),
    sagemaker_client=sm,

示例#18

0

显示文件

def sagemaker_session():
    boto_mock = MagicMock(name="boto_session", region_name=REGION)
    ims = sagemaker.Session(boto_session=boto_mock)
    ims.expand_role = Mock(return_value=EXPANDED_ROLE)
    return ims

示例#19

0

显示文件

文件： test_sagemaker.py 项目： msobroza/speaker_reco

    description='Compares the inference in keras and in tf.')
parser.add_argument("--train_path",
                    default='../train/',
                    help='Path to a directory that contains wav files.')
parser.add_argument("--test_path",
                    default='../test/phillipe_2.wav',
                    help='Path to a directory that contains wav files.')
parser.add_argument("--endpoint_feature",
                    default='sagemaker-tensorflow-2019-05-24-12-40-42-099',
                    help='Name of sagemaker endpoint of feature extraction')

FLAGS = parser.parse_args()

endpoint_feat_extract = FLAGS.endpoint_feature
boto_session = boto3.Session(region_name="eu-west-1")
session = sage.Session(boto_session=boto_session)
predictor = RealTimePredictor(endpoint_feat_extract,
                              sagemaker_session=session,
                              serializer=npy_serializer,
                              deserializer=numpy_deserializer)
ArgsParameters = collections.namedtuple('ArgsParameters', [
    'gpu', 'batch_size', 'net', 'ghost_cluster', 'vlad_cluster',
    'bottleneck_dim', 'aggregation_mode', 'resume', 'loss', 'test_type'
])
args = ArgsParameters(gpu='',
                      batch_size=16,
                      net='resnet34s',
                      ghost_cluster=2,
                      vlad_cluster=8,
                      bottleneck_dim=512,
                      aggregation_mode='gvlad',

示例#20

0

显示文件

文件： mnist-tensorflow.py 项目： ggoboogy/aws-sagemaker-examples

#ROLE = get_execution_role()
ENDPOINT_NAME = "mnist-tensorflow-jhy"


def show_image(arr):
    pixels = arr.reshape(28, 28)
    plt.imshow(pixels, cmap="gray")
    plt.show()


if __name__ == "__main__":
    # download MNIST dataset
    data_set = mnist.read_data_sets('data', reshape=True)

    # upload local datasets to S3 bucket
    sagemaker_session = sagemaker.Session()
    inputs = sagemaker_session.upload_data(
        path='data',
        bucket=BUCKET,
        key_prefix='hands-on/mnist-tensorflow/data')

    # create model with custom tensorflow code
    mnist_estimator = TensorFlow(role=ROLE,
                                 entry_point='mnist.py',
                                 output_path=OUTPUT_PATH,
                                 code_location=OUTPUT_PATH,
                                 training_steps=1000,
                                 evaluation_steps=100,
                                 train_instance_count=2,
                                 train_instance_type='ml.c4.xlarge')

示例#21

0

显示文件

文件： launch_sm_job.py 项目： timwukp/mask-rcnn-tensorflow

from sagemaker import get_execution_role
import sagemaker as sage
from sagemaker.estimator import Estimator
from sagemaker.inputs import FileSystemInput
import datetime
import subprocess
import sys

def get_str(cmd):
    content = subprocess.check_output(cmd, shell=True)
    return str(content)[2:-3]

account = get_str("echo $(aws sts get-caller-identity --query Account --output text)")
region = get_str("echo $(aws configure get region)")
image = str(sys.argv[1])
sess = sage.Session()
image_name=f"{account}.dkr.ecr.{region}.amazonaws.com/{image}"
sagemaker_iam_role = str(sys.argv[2])
num_gpus = 8
num_nodes = 4
instance_type = 'ml.p3.16xlarge'
custom_mpi_cmds = []

job_name = "maskrcnn-{}x{}-{}".format(num_nodes, num_gpus, image)

output_path = 's3://mrcnn-sagemaker/sagemaker_training_release'

lustre_input = FileSystemInput(file_system_id='fs-03f556d03c3c590a2',
                               file_system_type='FSxLustre',
                               directory_path='/fsx',
                               file_system_access_mode='ro')

示例#22

0

显示文件

文件： test_upload_data.py 项目： w601sxs/sagemaker-python-sdk

def sagemaker_session():
    boto_mock = Mock(name="boto_session")
    ims = sagemaker.Session(boto_session=boto_mock)
    ims.default_bucket = Mock(name="default_bucket", return_value=BUCKET_NAME)
    return ims

示例#23

0

显示文件

文件： Pop_Segmentation_Exercise.py 项目： eaamankwah/Machine-Learning-Engineer-Nanodegree

# We can reduce dimensionality with the built-in SageMaker model for PCA.

# ### Roles and Buckets
# 
# > To create a model, you'll first need to specify an IAM role, and to save the model attributes, you'll need to store them in an S3 bucket.
# 
# The `get_execution_role` function retrieves the IAM role you created at the time you created your notebook instance. Roles are essentially used to manage permissions and you can read more about that [in this documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html). For now, know that we have a FullAccess notebook, which allowed us to access and download the census data stored in S3.
# 
# You must specify a bucket name for an S3 bucket in your account where you want SageMaker model parameters to be stored. Note that the bucket must be in the same region as this notebook. You can get a default S3 bucket, which automatically creates a bucket for you and in your region, by storing the current SageMaker session and calling `session.default_bucket()`.

# In[16]:


from sagemaker import get_execution_role

session = sagemaker.Session() # store the current SageMaker session

# get IAM role
role = get_execution_role()
print(role)


# In[17]:


# get default bucket
bucket_name = session.default_bucket()
print(bucket_name)
print()

示例#24

0

显示文件

def handler(event, context):
    trainId = event['trainId']
    useSpotArg = event['useSpot']
    useSpot = True
    if useSpotArg.lower() == 'false':
        useSpot = False
    uniqueId = su.uuid()
    trainingConfigurationClient = bioims.client('training-configuration')
    trainInfo = trainingConfigurationClient.getTraining(trainId)
    embeddingName = trainInfo['embeddingName']
    embeddingInfo = trainingConfigurationClient.getEmbeddingInfo(embeddingName)
    trainScriptBucket = embeddingInfo['modelTrainingScriptBucket']
    trainScriptKey = embeddingInfo['modelTrainingScriptKey']
    localTrainingScript = '/tmp/bioims-training-script.py'
    getS3TextObjectWriteToPath(trainScriptBucket, trainScriptKey,
                               localTrainingScript)
    trainListArtifactKey = bp.getTrainImageListArtifactPath(trainId)
    sagemaker_session = sagemaker.Session()
    sagemaker_bucket = sagemaker_session.default_bucket()
    sagemaker_role = sagemaker.get_execution_role()
    py_version = '1.6.0'
    instance_type = embeddingInfo['trainingInstanceType']
    trainingHyperparameters = embeddingInfo['trainingHyperparameters']
    fsxInfo = getFsxInfo()
    print(fsxInfo)
    directory_path = '/' + fsxInfo['mountName']
    sgIds = []
    sgIds.append(fsxInfo['securityGroup'])
    jobName = 'bioims-' + trainId + '-' + uniqueId
    checkpoint_s3_uri = "s3://" + sagemaker_bucket + "/checkpoints/" + jobName

    file_system_input = FileSystemInput(file_system_id=fsxInfo['fsxId'],
                                        file_system_type='FSxLustre',
                                        directory_path=directory_path,
                                        file_system_access_mode='ro')

    trainingHyperparameters['train_list_file'] = trainListArtifactKey

    if useSpot:
        estimator = PyTorch(
            entry_point=localTrainingScript,
            role=sagemaker_role,
            framework_version=py_version,
            instance_count=1,
            instance_type=instance_type,
            py_version='py36',
            image_name=
            '763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-training:1.6.0-gpu-py36-cu101-ubuntu16.04',
            subnets=fsxInfo['subnetIds'],
            security_group_ids=sgIds,
            hyperparameters=trainingHyperparameters,
            train_use_spot_instances=True,
            train_max_wait=100000,
            train_max_run=100000,
            checkpoint_s3_uri=checkpoint_s3_uri,
            debugger_hook_config=False)
    else:
        estimator = PyTorch(
            entry_point=localTrainingScript,
            role=sagemaker_role,
            framework_version=py_version,
            instance_count=1,
            instance_type=instance_type,
            py_version='py36',
            image_name=
            '763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-training:1.6.0-gpu-py36-cu101-ubuntu16.04',
            subnets=fsxInfo['subnetIds'],
            security_group_ids=sgIds,
            hyperparameters=trainingHyperparameters,
            train_use_spot_instances=False,
            checkpoint_s3_uri=checkpoint_s3_uri,
            debugger_hook_config=False)

    trainingConfigurationClient.updateTraining(trainId, 'sagemakerJobName',
                                               jobName)

    estimator.fit(file_system_input, wait=False, job_name=jobName)

    responseInfo = {'trainingJobName': jobName}

    response = {'statusCode': 200, 'body': responseInfo}

    return response

示例#25

0

显示文件

文件： test_session.py 项目： Jarryd-rk/sagemaker-python-sdk

def sagemaker_session():
    boto_mock = Mock(name='boto_session')
    boto_mock.client('sts').get_caller_identity.return_value = {'Account': '123'}
    ims = sagemaker.Session(boto_session=boto_mock, sagemaker_client=Mock())
    ims.expand_role = Mock(return_value=EXPANDED_ROLE)
    return ims

示例#26

0

显示文件

文件： DataPrep.py 项目： pranav700/SageMaker-ImageClassification

import boto3
import re
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri
import os 
import urllib.request
import boto3

#Download and Unzip
!wget https://yourDatasource
!unzip https://yourdatasource.zip


role = get_execution_role()
bucket = sagemaker.Session().default_bucket()
training_image = get_image_uri(boto3.Session().region_name, 'image-classification')

def upload_to_s3(channel, Path):
    s3_path_to_data = sagemaker.Session().upload_data(bucket=bucket, 
                                                  path=Path, 
                                                  key_prefix=channel)


# data copy to s3
s3_train_key = "image-classification-full-training/train"
s3_validation_key = "image-classification-full-training/validation"
s3_train = 's3://{}/{}/'.format(bucket, s3_train_key)
s3_validation = 's3://{}/{}/'.format(bucket, s3_validation_key)

示例#27

0

显示文件

文件： train.py 项目： hideya/doodle

def train(source_dir,
          data_path='doodle/data',
          training_steps=20000,
          evaluation_steps=2000,
          train_instance_type='local',
          train_instance_count=1,
          run_tensorboard_locally=True,
          uid=None,
          role=None,
          bucket=None,
          profile_name=None):
    assert os.path.exists(source_dir)
    boto_session = boto3.Session(profile_name=profile_name)
    session = sagemaker.Session(boto_session=boto_session)
    role = role if role is not None else sagemaker.get_execution_role()
    bucket = bucket if bucket is not None else session.default_bucket()
    uid = uid if uid is not None else uuid4()
    logger.debug(session.get_caller_identity_arn())
    role = session.expand_role(role)

    params = {
        'train_tfrecord_file': 'train.tfr',
        'test_tfrecord_file': 'test.tfr',
        'samples_per_epoch': 700000,
        'save_summary_steps': 100,
    }

    output_path = 's3://{}/doodle/model/{}/export'.format(bucket, uid)
    checkpoint_path = 's3://{}/doodle/model/{}/ckpt'.format(bucket, uid)
    code_location = 's3://{}/doodle/model/{}/source'.format(bucket, uid)
    base_job_name = 'doodle-training-job-{}'.format(uid)
    data_dir = 's3://{}/{}'.format(bucket, data_path)

    logger.info('uid                  : {}'.format(uid))
    logger.info('execution_role       : {}'.format(role))
    logger.info('data_dir             : {}'.format(data_dir))
    logger.info('output_path          : {}'.format(output_path))
    logger.info('checkpoint_path      : {}'.format(checkpoint_path))
    logger.info('code_location        : {}'.format(code_location))
    logger.info('base_job_name        : {}'.format(base_job_name))
    logger.info('training_steps       : {}'.format(training_steps))
    logger.info('evaluation_steps     : {}'.format(evaluation_steps))
    logger.info('train_instance_count : {}'.format(train_instance_count))
    logger.info('train_instance_type  : {}'.format(train_instance_type))
    logger.info('hyperparameters      : {}'.format(json.dumps(params)))

    estimator = TensorFlow(hyperparameters=params,
                           output_path=output_path,
                           checkpoint_path=checkpoint_path,
                           code_location=code_location,
                           base_job_name=base_job_name,
                           source_dir=source_dir,
                           entry_point='doodle.py',
                           framework_version='1.6',
                           role=role,
                           training_steps=training_steps,
                           evaluation_steps=evaluation_steps,
                           train_instance_count=train_instance_count,
                           train_instance_type=train_instance_type)

    estimator.fit(data_dir, run_tensorboard_locally=run_tensorboard_locally)

示例#28

0

显示文件

文件： DataPrep.py 项目： pranav700/SageMaker-ImageClassification

def upload_to_s3(channel, Path):
    s3_path_to_data = sagemaker.Session().upload_data(bucket=bucket, 
                                                  path=Path, 
                                                  key_prefix=channel)

示例#29

0

显示文件

print(train_data)
print(test_data)
print('Output: {}'.format(output_prefix))

best_model = ""

job_name = stack_name + "-" + commit_id

fm = sagemaker.estimator.Estimator(containers[boto3.Session().region_name],
                                   role,
                                   train_instance_count=1,
                                   train_instance_type='ml.c4.xlarge',
                                   output_path=output_prefix,
                                   base_job_name=job_name,
                                   sagemaker_session=sagemaker.Session())

no_hyper_parameter_tuning = True

if (no_hyper_parameter_tuning):
    #
    # Run the training job
    #
    fm.set_hyperparameters(feature_dim=nbFeatures,
                           predictor_type='binary_classifier',
                           mini_batch_size=1000,
                           num_factors=64,
                           epochs=100)

    fm.fit({'train': train_data, 'test': test_data})

示例#30

0

显示文件

    if isinstance(session, sagemaker.Session):
        session = session.boto_session
    client = session.client('s3control')
    account_id = get_account(session)
    description = batch_describe_get(account_id=account_id,
                                     job_id=job_id,
                                     client=client)
    description = get_field(description, field)
    return description


def batch_describe_get(client, account_id, job_id):
    description = client.describe_job(AccountId=account_id, JobId=job_id)
    description = description.get('Job')
    return description


if __name__ == '__main__':
    import boto3
    session = sagemaker.Session(boto_session=boto3.Session(
        profile_name='default'))
    description = batch_describe(job_id='ee1e7233-2668-4320-9fff-e13613bd7622',
                                 session=session,
                                 field=None)
    print(description)

    description = batch_describe(job_id='ee1e7233-2668-4320-9fff-e13613bd7622',
                                 session=session,
                                 field="Report.s3")
    print(description)