#logging the row_count value
run.log("number", row_count)

#Complete the experiment
run.complete()


#some times we get some snapshot memory error. it is all because of memory limitation. we can set it to 2000mb instead of 300 and don't run the code in which virtual env is present because it copies the complete current directory.


#now lets check the log of the metrics with runDetails package 

# it also uses 3rd party website permission.
from azureml.widgets import RunDetails
RunDetails(run).show()

#second method to see log metrics

import json

# Get logged metrics
metrics = run.get_metrics()
print(json.dumps(metrics, indent=2))


#we can also upload local output file to the experiment output 
run.upload_file(name='outputs/sample.csv', path_or_stream='./sample.csv')


Exemplo n.º 2
0
    "primary_metric": 'AUC_weighted'
}
automl_config = AutoMLConfig(compute_target=compute_target,
                             task="classification",
                             training_data=dataset,
                             label_column_name="DEATH_EVENT",
                             path=project_folder,
                             enable_early_stopping=True,
                             featurization='auto',
                             debug_log="automl_errors.log",
                             model_explainability=True,
                             enable_onnx_compatible_models=True,
                             **automl_settings)

#################

automl_run1 = experiment.submit(automl_config, show_output=True)

####
rns = automl_experiment.get_runs()
next(rns)
#####
from azureml.widgets import RunDetails

widget1 = RunDetails(automl_run1)
widget1.show()
###

best_run = automl_run1.get_best_child()
best_run.get_metrics()
Exemplo n.º 3
0

pipeline_run = Experiment(ws, experiment_name).submit(pipeline)
print("Pipeline is submitted for execution")


# ### Monitor the Run Details
# 
# Observe the order in which the pipeline steps are executed: **processTrainDataStep** followed by the **trainStep**
# 
# Wait till both steps finish running. The cell below should periodically auto-refresh and you can also rerun the cell to force a refresh.

# In[ ]:


RunDetails(pipeline_run).show()


# ## Bulk Predictions

# ### Create the Raw Test Data DataReference Object
# 
# Create the **DataReference** object where the raw bulk test or input data file will be uploaded.

# In[ ]:


# Upload dummy raw test data to the blob storage
os.makedirs(test_data_location, exist_ok=True)
pd.DataFrame([[0]], columns = ['col1']).to_csv(os.path.join(test_data_location, 'raw-test-data.csv'), header=True, index=False)
Exemplo n.º 4
0
    20,  #for real scenarios we reccommend a timeout of at least one hour 
    "max_concurrent_iterations": 4,
    "max_cores_per_iteration": -1,
    "verbosity": logging.INFO,
}

automl_config = AutoMLConfig(task='regression',
                             compute_target=compute_target,
                             training_data=train_data,
                             label_column_name=label,
                             **automl_settings)

remote_run = experiment.submit(automl_config, show_output=False)

from azureml.widgets import RunDetails
RunDetails(remote_run).show()

remote_run.wait_for_completion(show_output=False)

best_run, fitted_model = remote_run.get_output()
print(best_run)
print(fitted_model)

# preview the first 3 rows of the dataset

test_data = test_data.to_pandas_dataframe()
y_test = test_data[label].fillna(0)
test_data = test_data.drop(label, 1)
test_data = test_data.fillna(0)

train_data = train_data.to_pandas_dataframe()
Exemplo n.º 5
0
 def scale_up(self, num_nodes):
     for worker in range(0, num_nodes):
         pet_run = self.pet_experiment.submit(self.estimator)
         self.workers_list.append(pet_run)
         RunDetails(pet_run).show()
Exemplo n.º 6
0
run.log('2nd_layer', FLAGS.second_layer)  ##### Modified
run.log('final_accuracy', eval_res[0]['accuracy'])  ##### Modified
run.log('final_loss', eval_res[0]['loss'])  ##### Modified

#%% [markdown]
# ## Stop Run

#%%
run.complete()

#%% [markdown]
# ## Show logs using AML run history widget
#
# You can also view your logs in your notebook. (For viewing in your notebook, you must install extensions on your jupyter server. See [Readme](https://github.com/tsmatz/azure-ml-tensorflow-complete-sample/).)

#%%
from azureml.widgets import RunDetails
RunDetails(run_instance=run).show()

#%% [markdown]
# ## Plot by code
#
# You can also explorer using python code and plot as you like.

#%%
import matplotlib.pyplot as plt
metrics = run.get_metrics()
plt.plot(metrics['training_accuracy'])

#%%
# - `use_gpu`: This will use the GPU on the compute target for training if set to True.
# - `pip_packages`: This allows you to define any additional libraries to install before training.
# %% [markdown]
# #### 1) Submit First Run
#
# We can now train our model by submitting the estimator object as a [run](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.run.run?view=azure-ml-py).

# %%
run1 = experiment.submit(estimator1)

# %% [markdown]
# We can view the current status of the run and stream the logs from within the notebook.

# %%
from azureml.widgets import RunDetails
RunDetails(run1).show()

# %% [markdown]
# You cancel a run at anytime which will stop the run and scale down the nodes in the compute target.

# %%
run1.cancel()

# %% [markdown]
# While we wait for the run to complete, let's go over how a Run is executed in Azure Machine Learning.
#
# ![](./images/aml-run.png)
# %% [markdown]
# #### 2) Add Metrics Logging
#
# So we were able to clone a Tensorflow 2.0 project and run it without any changes. However, with larger scale projects we would want to log some metrics in order to make it easier to monitor the performance of our model.
Exemplo n.º 8
0
                                   debug_log='automated_ml_errors.log',
                                   path=project_folder,
                                   X=x_train.values,
                                   y=y_train.values.flatten(),
                                   **automl_settings)

# COMMAND ----------

from azureml.core.experiment import Experiment
experiment = Experiment(ws, experiment_name)
local_run = experiment.submit(automated_ml_config, show_output=True)

# COMMAND ----------

from azureml.widgets import RunDetails
RunDetails(local_run).show()

# COMMAND ----------

children = list(local_run.get_children())
metricslist = {}
for run in children:
    properties = run.get_properties()
    metrics = {
        k: v
        for k, v in run.get_metrics().items() if isinstance(v, float)
    }
    metricslist[int(properties['iteration'])] = metrics

rundata = pd.DataFrame(metricslist).sort_index(1)
rundata
Exemplo n.º 9
0
    policy=None,
    primary_metric_name='validation_acc',
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=4,
    max_concurrent_runs=4)

# In[4]:

# Submit your hyperdrive run to the experiment and show run details with the widget.

hyperdrive_run = exp.submit(hyperdrive_config)

# In[5]:

from azureml.widgets import RunDetails
RunDetails(hyperdrive_run).show()

# In[6]:

#hyperdrive_run.get_metrics()

# In[7]:

import joblib
# Get your best run and save the model from that run.
best_run = hyperdrive_run.get_best_run_by_primary_metric()
#best_run_metrics = best_run.get_metrics()
#parameter_values = best_run.get_details()['runDefinition']['Arguments']

print('Best Run Id: ', best_run)
#print('\n Accuracy:', best_run_metrics['accuracy'])
Exemplo n.º 10
0
    def _start_estimator_training(self,
                                  training_name: str,
                                  estimator_type: str = None,
                                  input_datasets: np.array = None,
                                  input_datasets_to_download: np.array = None,
                                  compute_target: str = 'local',
                                  gpu_compute: bool = False,
                                  script_parameters: dict = None,
                                  show_widget: bool = True,
                                  **kwargs):
        ''' 
        Will start a new training using an Estimator, taking the training name as the folder of the run
        Args:
            training_name (str): The name of a training.  This will be used to create a directory.  Can contain subdirectory
            environment_type (str): one of these values (tensorflow, sklearn, pytorch).  
            input_datasets (np.array): An array of data set names that will be mounted on the compute in a directory of the dataset name
            input_datasets_to_download (np.array): An array of data set names that will be downloaded to the compute in a directory of the dataset name
            compute_target (str): The compute target (default = 'local') on which the training should be executed
            gpu_compute (bool): Indicates if GPU compute is required for this script or not
            script_parameters (dict): A dictionary of key/value parameters that will be passed as arguments to the training script
            show_widget (bool): Will display the live tracking of the submitted Run
        '''
        from azureml.train.estimator import Estimator

        # Check if directory exists
        if not (os.path.exists(training_name)
                and os.path.isdir(training_name)):
            raise FileNotFoundError(training_name)

        # Check compute target
        if compute_target != 'local':
            self.__check_compute_target(compute_target, gpu_compute)

        # Add datasets
        datasets = list()
        if (input_datasets is not None):
            for ds in input_datasets:
                datasets.append(
                    self.__workspace.datasets[ds].as_named_input(ds).as_mount(
                        path_on_compute=ds))
        if (input_datasets_to_download is not None):
            for ds in input_datasets_to_download:
                datasets.append(self.__workspace.datasets[ds].as_named_input(
                    ds).as_download(path_on_compute=ds))

        # as mount - as download
        constructor_parameters = {
            'source_directory': training_name,
            'script_params': script_parameters,
            'inputs': datasets,
            'compute_target': compute_target,
            'entry_script': 'train.py',
            'pip_requirements_file': 'requirements.txt',
            'use_gpu': gpu_compute,
            'use_docker': True
        }

        print('Creating estimator of type', estimator_type)

        if (estimator_type is None):
            # Using default Estimator
            estimator = Estimator(**constructor_parameters)
        elif (estimator_type == 'tensorflow'):
            from azureml.train.dnn import TensorFlow
            version_par = 'framework_version'
            if (not version_par in constructor_parameters.keys()):
                print('Defaulting to version 2.0 for TensorFlow')
                constructor_parameters[version_par] = '2.0'
            estimator = TensorFlow(**constructor_parameters)
        elif (estimator_type == 'sklearn'):
            from azureml.train.sklearn import SKLearn
            estimator = SKLearn(**constructor_parameters)
        elif (estimator_type == 'pytorch'):
            from azureml.train.dnn import PyTorch
            estimator = PyTorch(**constructor_parameters)

        # Submit training
        run = self.__experiment.submit(estimator)
        print(run.get_portal_url())

        if (show_widget):
            from azureml.widgets import RunDetails
            RunDetails(run).show()
Exemplo n.º 11
0
    def _start_environment_training(
            self,
            training_name: str,
            environment_type: str = None,
            input_datasets: np.array = None,
            input_datasets_to_download: np.array = None,
            compute_target: str = 'local',
            gpu_compute: bool = False,
            script_parameters: dict = None,
            show_widget: bool = True,
            **kwargs):
        ''' 
        Will start a new training using ScriptRunConfig, taking the training name as the folder of the run
        Args:
            training_name (str): The name of a training.  This will be used to create a directory.  Can contain subdirectory
            environment_type (str): either the name of an existing environment that will be taken as base, or one of these values (tensorflow, sklearn, pytorch).  
            input_datasets (np.array): An array of data set names that will be mounted on the compute in a directory of the dataset name
            input_datasets_to_download (np.array): An array of data set names that will be downloaded to the compute in a directory of the dataset name
            compute_target (str): The compute target (default = 'local') on which the training should be executed
            gpu_compute (bool): Indicates if GPU compute is required for this script or not
            script_parameters (dict): A dictionary of key/value parameters that will be passed as arguments to the training script
            show_widget (bool): Will display the live tracking of the submitted Run
        '''
        from azureml.train.estimator import Estimator
        from azureml.core import Environment, ScriptRunConfig
        from azureml.core.runconfig import RunConfiguration
        from azureml.core.runconfig import DataReferenceConfiguration
        from azureml.core.runconfig import CondaDependencies
        from arcus.azureml.experimenting import train_environment as te

        # Check if directory exists
        if not (os.path.exists(training_name)
                and os.path.isdir(training_name)):
            raise FileNotFoundError(training_name)

        # Check compute target
        if compute_target != 'local':
            self.__check_compute_target(compute_target, gpu_compute)

        training_env = te.get_training_environment(
            self.__workspace,
            training_name,
            os.path.join(training_name, 'requirements.txt'),
            use_gpu=gpu_compute,
            include_prerelease=True,
            environment_type=environment_type)
        runconfig = RunConfiguration()

        # Add datasets
        datarefs = dict()

        scriptargs = list()
        if script_parameters is not None:
            for key in script_parameters.keys():
                scriptargs.append(key)
                scriptargs.append(script_parameters[key])

        if (input_datasets is not None):
            for ds in input_datasets:
                print(f'Adding mounting data reference for dataset {ds}')
                # scriptargs.append(ds)
                scriptargs.append(
                    self.__workspace.datasets[ds].as_named_input(ds).as_mount(
                        path_on_compute=ds))
#                datastore, path = self._get_data_reference(self.__workspace.datasets[ds])
#                datarefs[ds] = DataReferenceConfiguration(datastore_name=datastore, path_on_datastore = path, path_on_compute = '/' + ds, mode = 'mount', overwrite = False)
        if (input_datasets_to_download is not None):
            for ds in input_datasets_to_download:
                print(f'Adding download data reference for dataset {ds}')
                # scriptargs.append(ds)
                scriptargs.append(self.__workspace.datasets[ds].as_named_input(
                    ds).as_download(path_on_compute=ds))

        scriptrunconfig = ScriptRunConfig(source_directory='./' +
                                          training_name,
                                          script="train.py",
                                          run_config=runconfig,
                                          arguments=scriptargs)
        scriptrunconfig.run_config.target = compute_target
        scriptrunconfig.run_config.environment = training_env
        #scriptrunconfig.run_config.data_references = datarefs

        # Submit training
        run = self.__experiment.submit(scriptrunconfig)
        print(run.get_portal_url())

        if (show_widget):
            from azureml.widgets import RunDetails
            RunDetails(run).show()
Exemplo n.º 12
0
from azureml.core.databricks import PyPiLibrary

notebook_path="/Users/[email protected]/ModelTraining" # Databricks notebook path

dbNbStep = DatabricksStep(
    name="DBNotebookInWS",
    inputs=[datasetFilePath],
    outputs=[output],
    num_workers=1,
    notebook_path=notebook_path,
    run_name='DB_Notebook_demo',
    compute_target=databricks_compute,
    allow_reuse=True,
    spark_version="7.2.x-scala2.12",
    pypi_libraries=[PyPiLibrary(package = 'scikit-learn')
                    ,PyPiLibrary(package = 'azureml-sdk')
                    ,PyPiLibrary(package = 'lightgbm')
                    ,PyPiLibrary(package = 'pandas')],
    node_type = "Standard_D13_v2"
)

steps = [dbNbStep]
pipeline = Pipeline(workspace=ws, steps=steps)
pipeline_run = Experiment(ws, 'DB_Notebook_demo').submit(pipeline)
pipeline_run.wait_for_completion()

# COMMAND ----------

from azureml.widgets import RunDetails
RunDetails(pipeline_run).show()