def get_run_configs(workspace, compute_target, env): environment = get_environment(workspace=workspace, env_name=env.aml_env_scoring_name, conda_dependencies=env.aml_conda_score_file, create_new=env.rebuild_scoring_env, enable_docker=True, use_gpu=env.use_gpu_for_scoring) score_run_config = ParallelRunConfig( environment=environment, entry_script=env.batchscore_script_path, source_directory=env.source_train_directory, error_threshold=10, output_action='append_row', compute_target=compute_target, node_count=env.max_nodes_scoring, run_invocation_timeout=300) copy_run_config = RunConfiguration() copy_run_config.environment = get_environment( workspace=workspace, env_name=env.aml_env_scorecopy_name, conda_dependencies=env.aml_conda_scorecopy_file, create_new=env.rebuild_scoring_env, enable_docker=True, use_gpu=env.use_gpu_for_scoring) return score_run_config, copy_run_config
def build_parallel_run_config(source_directory, train_env, compute, nodecount, workercount, timeout): parallel_run_config = ParallelRunConfig( source_directory=source_directory, entry_script='train_automl.py', mini_batch_size="1", # do not modify this setting run_invocation_timeout=timeout, run_max_try=3, error_threshold=-1, output_action="append_row", environment=train_env, process_count_per_node=workercount, compute_target=compute, node_count=nodecount) validate_parallel_run_config(parallel_run_config) return parallel_run_config
def build_parallel_run_config(train_env, compute, nodecount, workercount, timeout): from azureml.pipeline.steps import ParallelRunConfig from common.scripts.helper import validate_parallel_run_config parallel_run_config = ParallelRunConfig( source_directory='./automl_train/scripts', entry_script='train_minibatch.py', mini_batch_size="1", # do not modify this setting run_invocation_timeout=timeout, error_threshold=-1, output_action="append_row", environment=train_env, process_count_per_node=workercount, compute_target=compute, node_count=nodecount) validate_parallel_run_config(parallel_run_config) return parallel_run_config
def get_run_configs( ws: Workspace, computetarget: ComputeTarget, env: Env ) -> Tuple[ParallelRunConfig, RunConfiguration]: """ Creates the necessary run configurations required by the pipeline to enable parallelized scoring. :param ws: AML Workspace :param computetarget: AML Compute target :param env: Environment Variables :returns: Tuple[Scoring Run configuration, Score copy run configuration] """ # get a conda environment for scoring environment = get_environment( ws, env.aml_env_name_scoring, conda_dependencies_file=env.aml_env_score_conda_dep_file, enable_docker=True, use_gpu=env.use_gpu_for_scoring, create_new=env.rebuild_env_scoring, ) score_run_config = ParallelRunConfig( entry_script=env.batchscore_script_path, source_directory=env.sources_directory_train, error_threshold=10, output_action="append_row", compute_target=computetarget, node_count=env.max_nodes_scoring, environment=environment, run_invocation_timeout=300, ) copy_run_config = RunConfiguration() copy_run_config.environment = get_environment( ws, env.aml_env_name_score_copy, conda_dependencies_file=env.aml_env_scorecopy_conda_dep_file, enable_docker=True, use_gpu=env.use_gpu_for_scoring, create_new=env.rebuild_env_scoring, ) return (score_run_config, copy_run_config)
print("SDK version:", azureml.core.VERSION) dataset_name = 'grib-dataset' ws = Workspace.from_config() print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n') datastore = ws.get_default_datastore() input_ds = Dataset.get_by_name(ws, dataset_name) batch_data = DatasetConsumptionConfig("batch_dataset", input_ds, mode='mount') output_dir = PipelineData(name='batch_output', datastore=datastore) parallel_run_config = ParallelRunConfig.load_yaml(workspace=ws, path='convert_parallel.yml') batch_step = ParallelRunStep(name="batch-conversion-step", parallel_run_config=parallel_run_config, arguments=['--data_output_path', output_dir], inputs=[batch_data], output=output_dir, allow_reuse=False) steps = [batch_step] pipeline = Pipeline(workspace=ws, steps=steps) pipeline.validate() pipeline_run = Experiment(ws, 'convert-batch-pipeline').submit(pipeline) pipeline_run.wait_for_completion()
def main(): """Build pipeline.""" # Environment variables env = Env() # Azure ML workspace aml_workspace = Workspace.get( name=env.workspace_name, subscription_id=env.subscription_id, resource_group=env.resource_group, ) logger.info(f"Azure ML workspace: {aml_workspace}") # Azure ML compute cluster aml_compute = get_compute(aml_workspace, env.compute_name) logger.info(f"Aazure ML compute cluster: {aml_compute}") # Azure ML environment environment = Environment(name=env.aml_env_name) conda_dep = CondaDependencies( conda_dependencies_file_path="./local_development/dev_dependencies.yml" ) environment.python.conda_dependencies = conda_dep run_config = RunConfiguration() run_config.environment = environment # Pipeline Data preparation_pipelinedata = PipelineData("preparation_pipelinedata", is_directory=True).as_dataset() extraction_pipelinedata = PipelineData("extraction_pipelinedata", is_directory=True) training_pipelinedata = PipelineData("training_pipelinedata", is_directory=True) # List of pipeline steps step_list = list() preparation_step = PythonScriptStep( name="preparation-step", compute_target=aml_compute, source_directory=env.sources_directory_train, script_name=env.preparation_step_script_path, outputs=[preparation_pipelinedata], arguments=[ "--input_path", env.input_dir, "--output_path", preparation_pipelinedata, "--datastore_name", env.blob_datastore_name ], runconfig=run_config) step_list.append(preparation_step) parallel_run_config = ParallelRunConfig( source_directory=env.sources_directory_train, entry_script=env.extraction_step_script_path, mini_batch_size=env.mini_batch_size, error_threshold=env.error_threshold, output_action="append_row", environment=environment, compute_target=aml_compute, node_count=env.node_count, run_invocation_timeout=env.run_invocation_timeout, process_count_per_node=env.process_count_per_node, append_row_file_name="extraction_output.txt") extraction_step = ParallelRunStep( name="extraction-step", inputs=[preparation_pipelinedata], output=extraction_pipelinedata, arguments=["--output_dir", extraction_pipelinedata], parallel_run_config=parallel_run_config) step_list.append(extraction_step) training_step = PythonScriptStep( name="traning-step", compute_target=aml_compute, source_directory=env.sources_directory_train, script_name=env.training_step_script_path, inputs=[extraction_pipelinedata], outputs=[training_pipelinedata], arguments=[ "--input_dir", extraction_pipelinedata, "--output_dir", training_pipelinedata ], runconfig=run_config) step_list.append(training_step) # Build pipeline pipeline = Pipeline(workspace=aml_workspace, steps=step_list) pipeline.validate() logger.info(f"Built pipeline {pipeline}") # Publish pipeline published_pipeline = pipeline.publish( env.pipeline_name, description=env.pipeline_name, version=datetime.utcnow().isoformat()) try: pipeline_endpoint = PipelineEndpoint.get( workspace=aml_workspace, name=env.pipeline_endpoint_name) pipeline_endpoint.add_default(published_pipeline) except ErrorResponseException: pipeline_endpoint = PipelineEndpoint.publish( workspace=aml_workspace, name=env.pipeline_endpoint_name, pipeline=published_pipeline, description=env.pipeline_endpoint_name)
parser.add_argument("--runconfig", type=str, help="Path to the parallel runconfig for pipeline", dest="runconfig", required=True) args = parser.parse_args() print(f'Arguments: {args}') print('Connecting to workspace') ws = Workspace.from_config() print( f'WS name: {ws.name}\nRegion: {ws.location}\nSubscription id: {ws.subscription_id}\nResource group: {ws.resource_group}' ) print('Loading parallel runconfig for pipeline') parallel_run_config = ParallelRunConfig.load_yaml(workspace=ws, path=args.runconfig) print('Loading default batch dataset') batch_dataset = Dataset.get_by_name(ws, args.dataset) # Parametrize dataset input and dataset output name (batch scoring result) to the pipeline batch_dataset_parameter = PipelineParameter(name="batch_dataset", default_value=batch_dataset) batch_dataset_consumption = DatasetConsumptionConfig( "batch_dataset", batch_dataset_parameter).as_mount() datastore = ws.get_default_datastore() output_dataset_name = "batch_scoring_results" # Existing, GA-code - does not allow to specify the path on the datastore # output_dataset = PipelineData(name='batch_output', datastore=datastore).as_dataset()
def get_pipeline(aml_compute: ComputeTarget, blob_ds: Datastore, batch_env: Environment, tf_env: Environment) -> str: """ Creates pipeline steps Parameters: aml_compute (ComputeTarget): a reference to a compute blob_ds (DataStore): a reference to a datastore batch_env (Environment): a reference to environment object tf_env (Environment): a horovod/tf environment Returns: string: a set of pipeline steps """ # We need something to generate data by the way pipeline_files = PipelineData("pipeline_files", datastore=blob_ds).as_dataset() # Pipeline parameters to use with every run is_debug = PipelineParameter("is_debug", default_value=False) relay_connection_name = PipelineParameter("debug_relay_connection_name", default_value="none") single_step_config = RunConfiguration() single_step_config.environment = batch_env single_step = PythonScriptStep( name=f"single-step", script_name="samples/azure_ml_advanced/steps/single_step.py", source_directory=".", runconfig=single_step_config, arguments=[ "--pipeline-files", pipeline_files, "--is-debug", is_debug, "--debug-relay-connection-name", relay_connection_name, "--debug-port", 5678, "--debug-relay-connection-string-secret", debug_connection_string_secret_name ], inputs=[], outputs=[pipeline_files], compute_target=aml_compute, allow_reuse=False) output_dir = PipelineData("output_dir") parallel_run_config = ParallelRunConfig( entry_script="samples/azure_ml_advanced/steps/parallel_step.py", source_directory=".", mini_batch_size="5", output_action="summary_only", environment=batch_env, compute_target=aml_compute, error_threshold=10, run_invocation_timeout=600, # very important for debugging node_count=2, process_count_per_node=1) parallelrun_step = ParallelRunStep( name="parallel-run-step", parallel_run_config=parallel_run_config, inputs=[pipeline_files], output=output_dir, arguments=[ "--is-debug", is_debug, "--debug-relay-connection-name", relay_connection_name, "--debug-port", 5679, "--debug-relay-connection-string-secret", debug_connection_string_secret_name ], allow_reuse=False) parallelrun_step.run_after(single_step) distr_config = MpiConfiguration(process_count_per_node=1, node_count=2) src = ScriptRunConfig( source_directory=".", script="samples/azure_ml_advanced/steps/mpi/mpi_step_starter.py", arguments=[ "--input-ds", pipeline_files, "--is-debug", is_debug, "--debug-relay-connection-name", relay_connection_name, "--debug-port", 5680, "--debug-relay-connection-string-secret", debug_connection_string_secret_name ], compute_target=compute_name, environment=tf_env, distributed_job_config=distr_config, ) mpi_step = PythonScriptStep( name="mpi-step", script_name="samples/azure_ml_advanced/steps/mpi/mpi_step_starter.py", arguments=[ "--input-ds", pipeline_files, "--is-debug", is_debug, "--debug-relay-connection-name", relay_connection_name, "--debug-port", 5680, "--debug-relay-connection-string-secret", debug_connection_string_secret_name ], compute_target=aml_compute, inputs=[pipeline_files], outputs=[], runconfig=src.run_config, source_directory=".") mpi_step.run_after(parallelrun_step) print("Pipeline Steps Created") steps = [single_step, parallelrun_step, mpi_step] print(f"Returning {len(steps)} steps") return steps
def get_backtest_pipeline( experiment: Experiment, dataset: TabularDataset, process_per_node: int, node_count: int, compute_target: ComputeTarget, automl_settings: Dict[str, Any], step_size: int, step_number: int, model_name: Optional[str] = None, model_uid: Optional[str] = None, ) -> Pipeline: """ :param experiment: The experiment used to run the pipeline. :param dataset: Tabular data set to be used for model training. :param process_per_node: The number of processes per node. Generally it should be the number of cores on the node divided by two. :param node_count: The number of nodes to be used. :param compute_target: The compute target to be used to run the pipeline. :param model_name: The name of a model to be back tested. :param automl_settings: The dictionary with automl settings. :param step_size: The number of periods to step back in backtesting. :param step_number: The number of backtesting iterations. :param model_uid: The uid to mark models from this run of the experiment. :return: The pipeline to be used for model retraining. **Note:** The output will be uploaded in the pipeline output called 'score'. """ jasmine_client = JasmineClient( service_context=experiment.workspace.service_context, experiment_name=experiment.name, experiment_id=experiment.id, ) env = jasmine_client.get_curated_environment( scenario=Scenarios.AUTOML, enable_dnn=False, enable_gpu=False, compute=compute_target, compute_sku=experiment.workspace.compute_targets.get( compute_target.name ).vm_size, ) data_results = PipelineData( name="results", datastore=None, pipeline_output_name="results" ) ############################################################ # Split the data set using python script. ############################################################ run_config = RunConfiguration() run_config.docker.use_docker = True run_config.environment = env utilities.set_environment_variables_for_run(run_config) split_data = PipelineData(name="split_data_output", datastore=None).as_dataset() split_step = PythonScriptStep( name="split_data_for_backtest", script_name="data_split.py", inputs=[dataset.as_named_input("training_data")], outputs=[split_data], source_directory=PROJECT_FOLDER, arguments=[ "--step-size", step_size, "--step-number", step_number, "--time-column-name", automl_settings.get("time_column_name"), "--time-series-id-column-names", automl_settings.get("grain_column_names"), "--output-dir", split_data, ], runconfig=run_config, compute_target=compute_target, allow_reuse=False, ) ############################################################ # We will do the backtest the parallel run step. ############################################################ settings_path = os.path.join(PROJECT_FOLDER, SETTINGS_FILE) hru.dump_object_to_json(automl_settings, settings_path) mini_batch_size = PipelineParameter(name="batch_size_param", default_value=str(1)) back_test_config = ParallelRunConfig( source_directory=PROJECT_FOLDER, entry_script="retrain_models.py", mini_batch_size=mini_batch_size, error_threshold=-1, output_action="append_row", append_row_file_name="outputs.txt", compute_target=compute_target, environment=env, process_count_per_node=process_per_node, run_invocation_timeout=3600, node_count=node_count, ) utilities.set_environment_variables_for_run(back_test_config) forecasts = PipelineData(name="forecasts", datastore=None) if model_name: parallel_step_name = "{}-backtest".format(model_name.replace("_", "-")) else: parallel_step_name = "AutoML-backtest" prs_args = [ "--target_column_name", automl_settings.get("label_column_name"), "--output-dir", forecasts, ] if model_name is not None: prs_args.append("--model-name") prs_args.append(model_name) if model_uid is not None: prs_args.append("--model-uid") prs_args.append(model_uid) backtest_prs = ParallelRunStep( name=parallel_step_name, parallel_run_config=back_test_config, arguments=prs_args, inputs=[split_data], output=forecasts, allow_reuse=False, ) ############################################################ # Then we collect the output and return it as scores output. ############################################################ collection_step = PythonScriptStep( name="score", script_name="score.py", inputs=[forecasts.as_mount()], outputs=[data_results], source_directory=PROJECT_FOLDER, arguments=["--forecasts", forecasts, "--output-dir", data_results], runconfig=run_config, compute_target=compute_target, allow_reuse=False, ) # Build and return the pipeline. return Pipeline( workspace=experiment.workspace, steps=[split_step, backtest_prs, collection_step], )
from azureml.core.runconfig import DEFAULT_GPU_IMAGE cd = CondaDependencies.create(pip_packages=[ "tensorflow-gpu==1.15.2", "azureml-core", "azureml-dataprep[fuse]" ]) env = Environment(name="parallelenv") env.python.conda_dependencies = cd env.docker.base_image = DEFAULT_GPU_IMAGE from azureml.pipeline.steps import ParallelRunConfig parallel_run_config = ParallelRunConfig(environment=env, entry_script="batch_scoring.py", source_directory="scripts", output_action="append_row", mini_batch_size="20", error_threshold=1, compute_target=compute_target, process_count_per_node=2, node_count=1) from azureml.pipeline.steps import ParallelRunStep from datetime import datetime parallel_step_name = "batchscoring-" + datetime.now().strftime("%Y%m%d%H%M") label_config = label_ds.as_named_input("labels_input") batch_score_step = ParallelRunStep( name=parallel_step_name, inputs=[input_images.as_named_input("input_images")],
else: runId = run.parent.id dataset = Dataset.File.from_files( path=[(mydatastore, f"rawdata/daystoprocess/{runId}/*.csv")]) env = Environment(name="parallelenv") env.from_conda_specification('parallelenv', './DataIngest/parallelenv.yml') parallel_run_config = ParallelRunConfig( source_directory='.', entry_script='./DataIngest/parallelrunstep.py', mini_batch_size="1", error_threshold=30, output_action="append_row", environment=env, compute_target='cpu-cluster', append_row_file_name="my_outputs.txt", run_invocation_timeout=1200, node_count=1) parallelrun_step = ParallelRunStep( name="parallelapicalls", parallel_run_config=parallel_run_config, arguments=["--arg1", string_pipeline_param], inputs=[dataset.as_named_input("inputds")], output=output_dir #models=[ model ] #not needed as its only relevant in batch inferencing #arguments=[ ], #allow_reuse=True
# Get the batch dataset for input batch_data_set = ws.datasets['batch-data'] # Set the output location default_ds = ws.get_default_datastore() output_dir = PipelineData(name='inferences', datastore=default_ds, output_path_on_compute='results') # Define the parallel run step step configuration parallel_run_config = ParallelRunConfig( source_directory='batch_scripts', entry_script="batch_scoring_script.py", mini_batch_size="5", error_threshold=10, output_action="append_row", environment=batch_env, compute_target=aml_cluster, node_count=4) # Create the parallel run step parallelrun_step = ParallelRunStep( name = 'batch-score', parallel_run_config = parallel_run_config, inputs = [batch_data_set.as_named_input('batch_data')], output = output_dir, arguments = [], allow_reuse = True )
batch_env = Environment(name="batch_environment") batch_env.python.conda_dependencies = batch_conda_deps batch_env.docker.enabled = True batch_env.docker.base_image = DEFAULT_GPU_IMAGE from azureml.pipeline.core import PipelineParameter from azureml.pipeline.steps import ParallelRunConfig parallel_run_config = ParallelRunConfig( source_directory='', entry_script="batchscore3.py", mini_batch_size=PipelineParameter(name="batch_size_param", default_value="5"), error_threshold=10, output_action="append_row", append_row_file_name="mnist_outputs.txt", environment=batch_env, compute_target=compute_target, process_count_per_node=PipelineParameter(name="process_count_param", default_value=2), node_count=2) from azureml.pipeline.steps import ParallelRunStep from datetime import datetime parallel_step_name = "batchscoring-" + datetime.now().strftime("%Y%m%d%H%M") from azureml.pipeline.steps import ParallelRunStep parallelrun_step = ParallelRunStep(name=parallel_step_name,
# Add dependancies aml_run_config.python.conda_dependencies = CondaDependencies.create( conda_packages=['pandas', 'scikit-learn'], pip_packages=[ 'azureml-sdk', 'azureml-dataprep[fuse, pandas]', 'azureml-dataset-runtime[pandas, fuse]', 'tensorflow', 'keras', 'textblob', 'nltk', 'fuzzywuzzy', 'azureml-defaults', 'azureml-core' ], pin_sdk_version=False) parallel_run_config = ParallelRunConfig( source_directory='./', entry_script='skill_recommender_AML.py', mini_batch_size='5KB', error_threshold=-1, output_action='append_row', environment=aml_run_config, comput_target=comput_target, process_count_per_node=PipelineParameter(name='process_count_param', default_value=2), node_count=2, run_invocation_timeout=600) parallelrun_step = ParallelRunStep(name='skill-extractor-parallel', parallel_run_config=parallel_run_config, inputs=[named_emp_ds], output=output_dir, allow_reuse=True) pipeline = Pipeline(workspace=ws, steps=[parallelrun_step]) experiment = Experiment(ws, 'skill-extractor-parallel') pipeline_run = experiment.submit(pipeline)