Пример #1
0
def main():
    """
    Deploy model to your service
    """
    work_space = Workspace.from_config()
    environment = Environment("keras-service-environment")
    environment.python.conda_dependencies = CondaDependencies.create(
        python_version="3.7.7",
        pip_packages=["azureml-defaults", "numpy", "tensorflow==2.3.1"],
    )
    model = Model(work_space, "keras_mnist")
    model_list = model.list(work_space)
    validation_accuracy = []
    version = []
    for i in model_list:
        validation_accuracy.append(float(i.properties["val_accuracy"]))
        version.append(i.version)
    model = Model(work_space,
                  "keras_mnist",
                  version=version[np.argmax(validation_accuracy)])
    service_name = "keras-mnist-service"
    inference_config = InferenceConfig(entry_script="score_keras.py",
                                       environment=environment)
    aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)
    service = Model.deploy(
        workspace=work_space,
        name=service_name,
        models=[model],
        inference_config=inference_config,
        deployment_config=aci_config,
        overwrite=True,
    )
    service.wait_for_deployment(show_output=True)
    print(service.get_logs())
Пример #2
0
def evaluate_model():
    all_runs = exp.get_runs(properties={
        "release_id": release_id,
        "run_type": "train"
    },
                            include_children=True)
    # print(f'Search parameters: properties="release_id": {release_id}, "run_type": "train"')
    # print(f'Experiment :{exp}')

    # li_test = list(all_runs)
    # print(f'li_test: {li_test}')

    # all_runs contains the reference to the entire run that satisfied the properties values in the query:
    # The list of runs is returned in decending order, so the first value is the most recent run
    new_model_run = next(all_runs)
    new_model_run_id = new_model_run.id
    print(f'New Run found with Run ID of: {new_model_run_id}')

    new_model_run = Run(exp, run_id=new_model_run_id)
    new_model_acc = new_model_run.get_metrics().get("final-accuracy")

    try:
        # Get most recently registered model, we assume that
        # is the model in production.
        # Download this model and compare it with the recently
        # trained model by running test with same data set.
        model_list = Model.list(ws)
        production_model = next(
            filter(
                lambda x: x.created_time == max(model.created_time
                                                for model in model_list),
                model_list,
            ))
        production_model_run_id = production_model.tags.get("run_id")
        run_list = exp.get_runs()

        # Get the run history for both production model and
        # newly trained model and compare final-accuracy
        production_model_run = Run(exp, run_id=production_model_run_id)

        production_model_acc = production_model_run.get_metrics().get(
            "final-accuracy")

        print(
            "Current Production model accuracy: {}, New trained model accuracy: {}"
            .format(production_model_acc, new_model_acc))

        promote_new_model = False
        if new_model_acc < production_model_acc:
            promote_new_model = True
            print(
                "New trained model performs better, thus it will be registered"
            )
    except Exception:
        promote_new_model = True
        print("This is the first model to be trained, \
            thus nothing to evaluate for now")

    return promote_new_model, new_model_run, new_model_acc
Пример #3
0
def registerModel(workspace, experiment, model_name, model_file):
    '''
        Search an existing AMLS workspace for models. If one is found, return it, 
        otherwise create a new model. 

        If the parameter model_file points to a file on disk (check existence), then that
        is used to register a new model. If not, a new dummy pkl file will be generated. 

        PARAMS: 
            workspace        : azureml.core.Workspace   : Existing AMLS Workspace
            experiment_name  : azureml.core.Experiment  : Existing AMLS Experiment
            model_name       : String                   : The name of the model to register
            model_file       : String                   : This is one of two values
                                                            1. Name of a pkl file to create (dummy for RTS)
                                                            2. Full path to pkl model file that is in the same 
                                                               directory as the running script. 


        RETURNS: 
            azureml.core.Model
    '''

    return_model = None
    '''
        If model already exists then just return it. 
    '''
    models = Model.list(workspace)
    if models:
        for model in models:
            if model.name == model_name:
                print("Returning existing model....", model_name)
                return_model = model
                break

    if not return_model:
        '''
            Create it. 
        '''
        print("Creating new  model....")
        run = experiment.start_logging()
        run.log("Just simply dumping somethign in", True)

        # If the file does not exist, create a dummy model file.
        if os.path.exists(model_file) == False:
            createPickle(model_file)

        run.upload_file(name='outputs/' + model_file,
                        path_or_stream='./' + model_file)

        # Complete tracking and get link to details
        details = run.complete()

        return_model = run.register_model(model_name=model_name,
                                          model_path="outputs/" + model_file)

    return return_model
Пример #4
0
def main(model_name, model_version, target_path):
    run = Run.get_context()
    models = Model.list(run.experiment.workspace, name=model_name)

    if model_version is not None:
        try:
            model = next(m for m in models if m.version == model_version)
        except StopIteration:
            raise ValueError("This version of the model was not found")
    else:
        print("Model version not specified. Using latest version.")
        model = max(models, key=lambda x: x.version)

    model.download(target_dir=target_path, exist_ok=True)
Пример #5
0
                       model_framework=Model.Framework.SCIKITLEARN,
                       model_framework_version='0.20.3')

#alternatively
run.register_model( model_name='classification_model',
                    model_path='outputs/model.pkl', # run outputs path
                    description='A classification model',
                    tags={'dept': 'sales'},
                    model_framework=Model.Framework.SCIKITLEARN,
                    model_framework_version='0.20.3')


#view registered models
from azureml.core import Model

for model in Model.list(ws):
    # Get model name and auto-generated version
    print(model.name, 'version:', model.version)


#NOW TRAIN MODEL

#1 connect to workspace
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Пример #6
0
all_runs = exp.get_runs(properties={
    "release_id": release_id,
    "run_type": "train"
},
                        include_children=True)
new_model_run = next(all_runs)
new_model_run_id = new_model_run.id
print(f'New Run found with Run ID of: {new_model_run_id}')

try:
    # Get most recently registered model, we assume that
    # is the model in production.
    # Download this model and compare it with the recently
    # trained model by running test with same data set.
    model_list = Model.list(ws)
    production_model = next(
        filter(
            lambda x: x.created_time == max(model.created_time
                                            for model in model_list),
            model_list,
        ))
    production_model_run_id = production_model.tags.get("run_id")
    run_list = exp.get_runs()

    # Get the run history for both production model and
    # newly trained model and compare mse
    production_model_run = Run(exp, run_id=production_model_run_id)
    new_model_run = Run(exp, run_id=new_model_run_id)

    production_model_mse = production_model_run.get_metrics().get("mse")
Пример #7
0
# Get workspace
run = Run.get_context()
exp = run.experiment
ws = run.experiment.workspace

# Get workspace
inference_config = InferenceConfig(runtime="python",
                                   entry_script="score.py",
                                   conda_file="conda_dependencies.yml",
                                   source_directory="./deploy/scoring/")
print(inference_config)

# Do something with imagecnfig image_config = ContainerImage

# Get latest model
model_list = Model.list(ws, name=model_name)
model = next(
    filter(
        lambda x: x.created_time == max(model.created_time
                                        for model in model_list),
        model_list,
    ))

aks_target = ComputeTarget(ws, "one-dspe-aks")
deployment_config = AksWebservice.deploy_configuration(cpu_cores=1,
                                                       memory_gb=4)
service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=deployment_config,
model = run.register_model(model_name = "credit_scoring_" + notebook_username,
                       tags = {'area': "Credit scoring", 'type': "classification"}, 
                       description = "Credit Scoring model",
                       iteration = None, # you can deploy a specific iteration 
                       metric="AUC_weighted") # you can deploy the best model according to a different metric, for example "accuracy"

# COMMAND ----------

# MAGIC %md You can explore the registered models within your workspace and query by tag. Models are versioned. If you call the register_model command many times with same model name, you will get multiple versions of the model with increasing version numbers.

# COMMAND ----------

from azureml.core import Model

my_models = Model.list(workspace=ws, tags=['area'])
for m in my_models:
    print("Name:", m.name,"\tVersion:", m.version, "\tDescription:", m.description, m.tags)

# COMMAND ----------

# MAGIC %md ### Create Docker Image

# COMMAND ----------

# MAGIC %md In addition to the registered model, in order to create a Docker image we will also need:
# MAGIC - a scoring Python file (*score.py*), which will be called whenever there is a request for a prediction 
# MAGIC - a Conda dependencies file (*myenv.yml*), which contains all other Anaconda dependencies which should be included in the image
# MAGIC 
# MAGIC Optionally you can also provide:
# MAGIC - a Dockerfile, if you'd rather [use your own custom Docker image](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-custom-docker-image) instead of the one provided by Azure ML Service
def main():
    e = Env()

    print('********************')
    print(e.source_directory)

    files = os.listdir('./aml_pipeline')
    for f in files:
        print(f)

    print('***************')

    workspace_name = e.workspace_name
    subscription_id = e.subscription_id
    resource_group = e.resource_group

    #Connect to AML Workspace
    print('workspace_name = ' + workspace_name)
    print('subscription_id = ' + subscription_id)
    print('resource_group = ' + resource_group)

    ws = Workspace.get(
        name=workspace_name,
        subscription_id=subscription_id,
        resource_group=resource_group,
    )

    print('Ready to use Azure ML {} to work with {}'.format(
        azureml.core.VERSION, ws.name))

    default_ds = ws.get_default_datastore()

    if 'diabetes dataset' not in ws.datasets:
        default_ds.upload_files(
            files=['diabetes.csv',
                   'diabetes2.csv'],  # Upload the diabetes csv files in /data
            target_path=
            'diabetes-data/',  # Put it in a folder path in the datastore
            overwrite=True,  # Replace existing files of the same name
            show_progress=True)

        #Create a tabular dataset from the path on the datastore (this may take a short while)
        tab_data_set = Dataset.Tabular.from_delimited_files(
            path=(default_ds, 'diabetes-data/*.csv'))

        # Register the tabular dataset
        try:
            tab_data_set = tab_data_set.register(workspace=ws,
                                                 name='diabetes dataset',
                                                 description='diabetes data',
                                                 tags={'format': 'CSV'},
                                                 create_new_version=True)
            print('Dataset registered.')
        except Exception as ex:
            print(ex)
    else:
        print('Dataset already registered.')

    # Create a folder for the pipeline step files
    experiment_folder = 'diabetes_pipeline'
    os.makedirs(experiment_folder, exist_ok=True)

    print(experiment_folder)

    cluster_name = "mmcomputecluster"

    try:
        # Check for existing compute target
        pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
        print('Found existing cluster, use it.')
    except ComputeTargetException:
        # If it doesn't already exist, create it
        try:
            compute_config = AmlCompute.provisioning_configuration(
                vm_size='STANDARD_DS11_V2', max_nodes=2)
            pipeline_cluster = ComputeTarget.create(ws, cluster_name,
                                                    compute_config)
            pipeline_cluster.wait_for_completion(show_output=True)
        except Exception as ex:
            print(ex)

    # Create a Python environment for the experiment
    diabetes_env = Environment("diabetes-pipeline-env")
    diabetes_env.python.user_managed_dependencies = False  # Let Azure ML manage dependencies
    diabetes_env.docker.enabled = True  # Use a docker container

    # Create a set of package dependencies
    diabetes_packages = CondaDependencies.create(
        conda_packages=[
            'scikit-learn', 'ipykernel', 'matplotlib', 'pandas', 'pip'
        ],
        pip_packages=[
            'azureml-defaults', 'azureml-dataprep[pandas]', 'pyarrow'
        ])

    # Add the dependencies to the environment
    diabetes_env.python.conda_dependencies = diabetes_packages

    # Register the environment
    diabetes_env.register(workspace=ws)
    registered_env = Environment.get(ws, 'diabetes-pipeline-env')

    # Create a new runconfig object for the pipeline
    pipeline_run_config = RunConfiguration()

    # Use the compute you created above.
    pipeline_run_config.target = pipeline_cluster

    # Assign the environment to the run configuration
    pipeline_run_config.environment = registered_env

    print("Run configuration created.")

    # Get the training dataset
    diabetes_ds = ws.datasets.get("diabetes dataset")

    # Create a PipelineData (temporary Data Reference) for the model folder
    prepped_data_folder = PipelineData("prepped_data_folder",
                                       datastore=ws.get_default_datastore())

    # Step 1, Run the data prep script
    prep_step = PythonScriptStep(name="Prepare Data",
                                 script_name="prep_diabetes.py",
                                 source_directory='./aml_pipeline',
                                 arguments=[
                                     '--input-data',
                                     diabetes_ds.as_named_input('raw_data'),
                                     '--prepped-data', prepped_data_folder
                                 ],
                                 outputs=[prepped_data_folder],
                                 compute_target=pipeline_cluster,
                                 runconfig=pipeline_run_config,
                                 allow_reuse=True)

    # Step 2, run the training script
    train_step = PythonScriptStep(
        name="Train and Register Model",
        source_directory='./aml_pipeline',
        script_name="train_diabetes.py",
        arguments=['--training-folder', prepped_data_folder],
        inputs=[prepped_data_folder],
        compute_target=pipeline_cluster,
        runconfig=pipeline_run_config,
        allow_reuse=True)

    print("Pipeline steps defined")

    pipeline_steps = [prep_step, train_step]
    pipeline = Pipeline(workspace=ws, steps=pipeline_steps)
    print("Pipeline is built.")

    # Create an experiment and run the pipeline
    experiment = Experiment(workspace=ws, name='jlg-exp')
    pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
    print("Pipeline submitted for execution.")
    pipeline_run.wait_for_completion(show_output=True)

    for run in pipeline_run.get_children():
        print(run.name, ':')
        metrics = run.get_metrics()
        for metric_name in metrics:
            print('\t', metric_name, ":", metrics[metric_name])

    for model in Model.list(ws):
        print(model.name, 'version:', model.version)
        for tag_name in model.tags:
            tag = model.tags[tag_name]
            print('\t', tag_name, ':', tag)
        for prop_name in model.properties:
            prop = model.properties[prop_name]
            print('\t', prop_name, ':', prop)
        print('\n')

    # Publish the pipeline from the run
    published_pipeline = pipeline_run.publish_pipeline(
        name="diabetes-training-pipeline",
        description="Trains diabetes model",
        version="1.0")

    published_pipeline

    rest_endpoint = published_pipeline.endpoint
    print(rest_endpoint)
Пример #10
0
avg_score = sum(score) / len(score)
avg_time = total_time / len(score)

tags = {
    'accuracy': avg_score,
    'latency': avg_time,
    'num_images': len(score),
    'name': 'eval'
}

# Register the model only if it has highest accuracy among the trained models
run = Run.get_context()
ws = run.experiment.workspace

registered_models = Model.list(ws, name=MODEL_NAME)
max_accuracy = 0
for model in registered_models:
    registered_accuracy = model.tags['accuracy']
    if max_accuracy < registered_accuracy:
        max_accuracy = registered_accuracy

if max_accuracy < avg_score:
    # TODO: Verify that images and masks are splitted in the same way
    Model.register(
        workspace=ws,
        model_path=PATH_MODEL,  # model_path contains architecture and weights
        model_name=MODEL_NAME,
        tags=tags,
        model_framework=Model.Framework.TENSORFLOW,
        model_framework_version='2.3')  # Second element is type Dataset
Пример #11
0
    def getOperationOutput(self,
                           operationNoun,
                           operationId,
                           userId,
                           subscriptionId,
                           downloadFiles=True):
        operationName = self.GetOperationNameByNoun(operationNoun)

        if operationName == 'train':

            tags = [['userId', userId], ['modelId', operationId],
                    ['subscriptionId', subscriptionId]]
            models = Model.list(self._workspace, tags=tags)
            if len(models) == 0:
                return None
            model = models[0]
            result = {
                'id': operationId,
                'description': model.description,
                'created_time': model.created_time
            }
            return result, "model"

        if operationName == 'deploy':

            tags = [['userId', userId], ['endpointId', operationId],
                    ['subscriptionId', subscriptionId]]
            endpoints = Webservice.list(self._workspace, tags=tags)
            if len(endpoints) == 0:
                return None, None
            endpoint = endpoints[0]
            primaryKey, secondaryKey = endpoint.get_keys()
            result = {
                'id': operationId,
                'description': endpoint.description,
                'created_time': endpoint.created_time,
                'scoring_uri': endpoint.scoring_uri,
                'primary_key': primaryKey,
                'secondary_key': secondaryKey
            }

            return result, "endpoint"

        tags = {
            'userId': userId,
            'operationId': operationId,
            'operationName': operationName,
            'subscriptionId': subscriptionId
        }

        experimentName = subscriptionId
        exp = Experiment(self._workspace, experimentName)
        runs = exp.get_runs(type='azureml.PipelineRun', tags=tags)
        try:
            run = next(runs)
            child_runs = run.get_children()
            child_run = next(child_runs)
            outputType = self._utils.GetOutputType(operationName)
            if outputType == 'json':
                with tempfile.TemporaryDirectory() as tmp:
                    path = os.path.join(tmp, 'output.json')
                    files = child_run.download_file('/outputs/output.json',
                                                    path)
                    with open(path) as file:
                        return json.load(file), "json"
            elif outputType == 'file':
                if downloadFiles:
                    tmp = tempfile.TemporaryDirectory().name
                    path = os.path.join(tmp, "outputs")
                    zip_file_path = os.path.join(
                        tmp, "output_{}.zip".format(operationId))
                    files = child_run.download_files("/outputs",
                                                     path,
                                                     append_prefix=False)
                    zipf = zipfile.ZipFile(zip_file_path, "w",
                                           zipfile.ZIP_DEFLATED)
                    self.zipdir(path, zipf, "outputs")
                    zipf.close()
                    return zip_file_path, "file"
                else:
                    return "file", "file"
        except StopIteration:
            return None
Пример #12
0
# DO NOT USE IN PRODUCTION ENVIRONMENTS.

from azureml.core import Environment, Model, Run
from azureml.core.webservice import AksWebservice
from azureml.core.compute import AksCompute
from azureml.core.model import InferenceConfig
from azureml.core.conda_dependencies import CondaDependencies

run = Run.get_context()
ws = run.experiment.workspace
service_name = "trump-tweets-scoring-aml"
model_name = "trump-tweet-classification"

# getting last model
model = next(
    iter(Model.list(workspace=ws, name=model_name, latest=True)),
    None,
)

environment = Environment("trump-tweet-inferencing")
conda_dep = CondaDependencies()
conda_dep.add_conda_package("scikit-learn")
conda_dep.add_pip_package("azureml-sdk")
environment.python.conda_dependencies = conda_dep

aks_target = AksCompute(ws, "trump-tweets-inf")

deployment_config = AksWebservice.deploy_configuration(cpu_cores=2,
                                                       memory_gb=8)

inference_config = InferenceConfig(entry_script="score.py",
new_model_run = None

# get the last completed run with metrics
new_model_run = None
for run in all_runs:
    acc = run.get_metrics().get("val_accuracy")
    print(f'run is {run}, acc is {acc}')
    if run.get_status() == 'Finished' and acc is not None:
        new_model_run = run
        print('found a valid new model with acc {}'.format(acc))
        break

if new_model_run is None:
    raise Exception('new model must log a val_accuracy metric, please check'
                    ' your train.py file')
model_generator = Model.list(ws)

# Check that there are models
if len(model_generator) > 0:

    # Get the model with best val accuracy, assume this is best
    cur_max = None
    production_model = None

    for model in model_generator:
        cur_acc = model.run.get_metrics().get("val_accuracy")[-1]
        if cur_max is None or cur_acc > cur_max:
            cur_max = cur_acc
            production_model = model

    production_model_acc = cur_max