def hyperparameter_tuning(ws,experiment): # Create and submit a Hyperdrive job cluster = ws.compute_targets[AML.compute_name] script_params={ '--datastore-dir': ws.get_default_datastore().as_mount(), } tf_estimator = TensorFlow(source_directory='scripts', compute_target=cluster, entry_script='train.py', script_params=script_params, use_gpu=True) ps = RandomParameterSampling( { '--learning-rate': loguniform(-15, -3) } ) early_termination_policy = BanditPolicy(slack_factor = 0.15, evaluation_interval=2) hyperdrive_run_config = HyperDriveRunConfig(estimator = tf_estimator, hyperparameter_sampling = ps, policy = early_termination_policy, primary_metric_name = "validation_accuracy", primary_metric_goal = PrimaryMetricGoal.MAXIMIZE, max_total_runs = 20, max_concurrent_runs = 4) hd_run = experiment.submit(hyperdrive_run_config) RunDetails(Run(experiment, hd_run.id)).show() return hd_run
def get_parameter_distribution(distribution, **kwargs): if "choice" in distribution.lower(): parameter_distr = choice( kwargs.get("options", []) ) elif "randint" in distribution.lower(): parameter_distr = randint( upper=kwargs.get("upper", None) ) elif "uniform" in distribution.lower(): parameter_distr = uniform( min_value=kwargs.get("min_value", None), max_value=kwargs.get("max_value", None) ) elif "quniform" in distribution.lower(): parameter_distr = quniform( min_value=kwargs.get("min_value", None), max_value=kwargs.get("max_value", None), q=kwargs.get("q", None) ) elif "loguniform" in distribution.lower(): parameter_distr = loguniform( min_value=kwargs.get("min_value", None), max_value=kwargs.get("max_value", None), ) elif "qloguniform" in distribution.lower(): parameter_distr = qloguniform( min_value=kwargs.get("min_value", None), max_value=kwargs.get("max_value", None), q=kwargs.get("q", None) ) elif "normal" in distribution.lower(): parameter_distr = normal( mu=kwargs.get("mu", None), sigma=kwargs.get("sigma", None) ) elif "qnormal" in distribution.lower(): parameter_distr = qnormal( mu=kwargs.get("mu", None), sigma=kwargs.get("sigma", None), q=kwargs.get("q", None) ) elif "lognormal" in distribution.lower(): parameter_distr = lognormal( mu=kwargs.get("mu", None), sigma=kwargs.get("sigma", None) ) elif "qlognormal" in distribution.lower(): parameter_distr = qlognormal( mu=kwargs.get("mu", None), sigma=kwargs.get("sigma", None), q=kwargs.get("q", None) ) else: parameter_distr = None raise RunConfigurationException(f"Parameter distribution for parameter not defined in settings. Please choose between \'choice\', \'randint\', \'uniform\', \'quniform\', \'loguniform\', \'qloguniform\', \'normal\', \'qnormal\', \'lognormal\' and \'qlognormal\'") return parameter_distr
def main(epochs, iterations, compute_target, concurrent_runs): cli_auth = AzureCliAuthentication() experiment = Experiment.from_directory(".", auth=cli_auth) ws = experiment.workspace cluster = ws.compute_targets[compute_target] food_data = ws.datastores['food_images'] script_arguments = {"--data-dir": food_data.as_mount(), "--epochs": epochs} tf_est = TensorFlow(source_directory=".", entry_script='code/train/train.py', script_params=script_arguments, compute_target=cluster, conda_packages=['pillow', 'pandas'], pip_packages=['click', 'seaborn'], use_docker=True, use_gpu=True, framework_version='1.13') # Run on subset of food categories tf_est.run_config.arguments.extend( ['apple_pie', 'baby_back_ribs', 'baklava', 'beef_carpaccio']) param_sampler = RandomParameterSampling({ '--minibatch-size': choice(16, 32, 64), '--learning-rate': loguniform(-9, -6), '--optimizer': choice('rmsprop', 'adagrad', 'adam') }) # Create Early Termination Policy etpolicy = BanditPolicy(evaluation_interval=2, slack_factor=0.1) # Create HyperDrive Run Configuration hyper_drive_config = HyperDriveConfig( estimator=tf_est, hyperparameter_sampling=param_sampler, policy=etpolicy, primary_metric_name='acc', primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, max_total_runs=iterations, max_concurrent_runs=concurrent_runs) # Submit the Hyperdrive Run print("Submitting Hyperdrive Run") hd_run = experiment.submit(hyper_drive_config) hd_run.wait_for_completion(raise_on_error=True, show_output=True) print("Finishing Run") best_run = hd_run.get_best_run_by_primary_metric() print(f'##vso[task.setvariable variable=run_id]{best_run.id}')
def get_parameter_distribution(parameter_name, parameter_setting): if "choice" in parameter_setting["distribution"]: parameter_distr = choice(parameter_setting["parameters"]["options"]) elif "randint" in parameter_setting["distribution"]: parameter_distr = randint( upper=parameter_setting["parameters"]["upper"]) elif "uniform" in parameter_setting["distribution"]: parameter_distr = uniform( min_value=parameter_setting["parameters"]["min_value"], max_value=parameter_setting["parameters"]["max_value"]) elif "quniform" in parameter_setting["distribution"]: parameter_distr = quniform( min_value=parameter_setting["parameters"]["min_value"], max_value=parameter_setting["parameters"]["max_value"], q=parameter_setting["parameters"]["q"]) elif "loguniform" in parameter_setting["distribution"]: parameter_distr = loguniform( min_value=parameter_setting["parameters"]["min_value"], max_value=parameter_setting["parameters"]["max_value"]) elif "qloguniform" in parameter_setting["distribution"]: parameter_distr = qloguniform( min_value=parameter_setting["parameters"]["min_value"], max_value=parameter_setting["parameters"]["max_value"], q=parameter_setting["parameters"]["q"]) elif "normal" in parameter_setting["distribution"]: parameter_distr = normal( mu=parameter_setting["parameters"]["mu"], sigma=parameter_setting["parameters"]["sigma"]) elif "qnormal" in parameter_setting["distribution"]: parameter_distr = qnormal( mu=parameter_setting["parameters"]["mu"], sigma=parameter_setting["parameters"]["sigma"], q=parameter_setting["parameters"]["q"]) elif "lognormal" in parameter_setting["distribution"]: parameter_distr = lognormal( mu=parameter_setting["parameters"]["mu"], sigma=parameter_setting["parameters"]["sigma"]) elif "qlognormal" in parameter_setting["distribution"]: parameter_distr = qlognormal( mu=parameter_setting["parameters"]["mu"], sigma=parameter_setting["parameters"]["sigma"], q=parameter_setting["parameters"]["q"]) else: parameter_distr = None raise RunConfigurationException( "Parameter distribution for parameter {} not defined in settings. Please choose between \'choice\', \'randint\', \'uniform\', \'quniform\', \'loguniform\', \'qloguniform\', \'normal\', \'qnormal\', \'lognormal\' and \'qlognormal\'" .format(parameter_name)) return parameter_distr
5, "--weight_decay", 0.01, # will be overridden by HyperDrive "--disable_tqdm", True, ], compute_target=target, environment=env, ) # set up hyperdrive search space convert_base = lambda x: float(np.log(x)) search_space = { "--learning_rate": hyperdrive.loguniform( convert_base(1e-6), convert_base(5e-2)), # NB. loguniform on [exp(min), exp(max)] "--weight_decay": hyperdrive.uniform(5e-3, 15e-2), "--per_device_train_batch_size": hyperdrive.choice([16, 32]), } hyperparameter_sampling = RandomParameterSampling(search_space) policy = TruncationSelectionPolicy(truncation_percentage=50, evaluation_interval=2, delay_evaluation=0) hyperdrive_config = HyperDriveConfig( run_config=config,
def build_pipeline(dataset, ws, config): print("building pipeline for dataset %s in workspace %s" % (dataset, ws.name)) base_dir = '.' def_blob_store = ws.get_default_datastore() # folder for scripts that need to be uploaded to Aml compute target script_folder = './scripts' os.makedirs(script_folder, exist_ok=True) shutil.copy(os.path.join(base_dir, 'video_decoding.py'), script_folder) shutil.copy(os.path.join(base_dir, 'pipelines_submit.py'), script_folder) shutil.copy(os.path.join(base_dir, 'pipelines_create.py'), script_folder) shutil.copy(os.path.join(base_dir, 'train.py'), script_folder) shutil.copy(os.path.join(base_dir, 'data_utils.py'), script_folder) shutil.copy(os.path.join(base_dir, 'prednet.py'), script_folder) shutil.copy(os.path.join(base_dir, 'keras_utils.py'), script_folder) shutil.copy(os.path.join(base_dir, 'data_preparation.py'), script_folder) shutil.copy(os.path.join(base_dir, 'model_registration.py'), script_folder) shutil.copy(os.path.join(base_dir, 'config.json'), script_folder) cpu_compute_name = config['cpu_compute'] try: cpu_compute_target = AmlCompute(ws, cpu_compute_name) print("found existing compute target: %s" % cpu_compute_name) except:# ComputeTargetException: print("creating new compute target") provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4, idle_seconds_before_scaledown=1800) cpu_compute_target = ComputeTarget.create(ws, cpu_compute_name, provisioning_config) cpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # use get_status() to get a detailed status for the current cluster. print(cpu_compute_target.get_status().serialize()) # choose a name for your cluster gpu_compute_name = config['gpu_compute'] try: gpu_compute_target = AmlCompute(workspace=ws, name=gpu_compute_name) print("found existing compute target: %s" % gpu_compute_name) except: print('Creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', max_nodes=10, idle_seconds_before_scaledown=1800) # create the cluster gpu_compute_target = ComputeTarget.create(ws, gpu_compute_name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it uses the scale settings for the cluster gpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # use get_status() to get a detailed status for the current cluster. try: print(gpu_compute_target.get_status().serialize()) except BaseException as e: print("Could not get status of compute target.") print(e) # conda dependencies for compute targets cpu_cd = CondaDependencies.create(conda_packages=["py-opencv=3.4.2"], pip_indexurl='https://azuremlsdktestpypi.azureedge.net/sdk-release/Candidate/604C89A437BA41BD942B4F46D9A3591D', pip_packages=["azure-storage-blob==1.5.0", "hickle==3.4.3", "requests==2.21.0", "sklearn", "pandas==0.24.2", "azureml-sdk", "numpy==1.16.2", "pillow==6.0.0"]) # Runconfigs cpu_compute_run_config = RunConfiguration(conda_dependencies=cpu_cd) cpu_compute_run_config.environment.docker.enabled = True cpu_compute_run_config.environment.docker.gpu_support = False cpu_compute_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE cpu_compute_run_config.environment.spark.precache_packages = False print("PipelineData object created") # DataReference to where video data is stored. video_data = DataReference( datastore=def_blob_store, data_reference_name="video_data", path_on_datastore=os.path.join("prednet", "data", "video", dataset)) print("DataReference object created") # Naming the intermediate data as processed_data1 and assigning it to the variable processed_data1. raw_data = PipelineData("raw_video_fames", datastore=def_blob_store) preprocessed_data = PipelineData("preprocessed_video_frames", datastore=def_blob_store) data_metrics = PipelineData("data_metrics", datastore=def_blob_store) data_output = PipelineData("output_data", datastore=def_blob_store) # prepare dataset for training/testing prednet video_decoding = PythonScriptStep( name='decode_videos', script_name="video_decoding.py", arguments=["--input_data", video_data, "--output_data", raw_data], inputs=[video_data], outputs=[raw_data], compute_target=cpu_compute_target, source_directory=script_folder, runconfig=cpu_compute_run_config, allow_reuse=True, hash_paths=['.'] ) print("video_decode step created") # prepare dataset for training/testing recurrent neural network data_prep = PythonScriptStep( name='prepare_data', script_name="data_preparation.py", arguments=["--input_data", raw_data, "--output_data", preprocessed_data], inputs=[raw_data], outputs=[preprocessed_data], compute_target=cpu_compute_target, source_directory=script_folder, runconfig=cpu_compute_run_config, allow_reuse=True, hash_paths=['.'] ) data_prep.run_after(video_decoding) print("data_prep step created") # configure access to ACR for pulling our custom docker image acr = ContainerRegistry() acr.address = config['acr_address'] acr.username = config['acr_username'] acr.password = config['acr_password'] est = Estimator(source_directory=script_folder, compute_target=gpu_compute_target, entry_script='train.py', use_gpu=True, node_count=1, custom_docker_image = "wopauli_1.8-gpu:1", image_registry_details=acr, user_managed=True ) ps = RandomParameterSampling( { '--batch_size': choice(1, 2, 4, 8), '--filter_sizes': choice("3, 3, 3", "4, 4, 4", "5, 5, 5"), '--stack_sizes': choice("48, 96, 192", "36, 72, 144", "12, 24, 48"), #, "48, 96"), '--learning_rate': loguniform(-6, -1), '--lr_decay': loguniform(-9, -1), '--freeze_layers': choice("0, 1, 2", "1, 2, 3", "0, 1", "1, 2", "2, 3", "0", "3"), '--transfer_learning': choice("True", "False") } ) policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=10) hdc = HyperDriveConfig(estimator=est, hyperparameter_sampling=ps, policy=policy, primary_metric_name='val_loss', primary_metric_goal=PrimaryMetricGoal.MINIMIZE, max_total_runs=10, max_concurrent_runs=5, max_duration_minutes=60*6 ) hd_step = HyperDriveStep( name="train_w_hyperdrive", hyperdrive_run_config=hdc, estimator_entry_script_arguments=[ '--data-folder', preprocessed_data, '--remote_execution', '--dataset', dataset ], inputs=[preprocessed_data], metrics_output = data_metrics, allow_reuse=True ) hd_step.run_after(data_prep) registration_step = PythonScriptStep( name='register_model', script_name='model_registration.py', arguments=['--input_dir', data_metrics, '--output_dir', data_output], compute_target=cpu_compute_target, inputs=[data_metrics], outputs=[data_output], source_directory=script_folder, allow_reuse=True, hash_paths=['.'] ) registration_step.run_after(hd_step) pipeline = Pipeline(workspace=ws, steps=[video_decoding, data_prep, hd_step, registration_step]) print ("Pipeline is built") pipeline.validate() print("Simple validation complete") pipeline_name = 'prednet_' + dataset published_pipeline = pipeline.publish(name=pipeline_name) schedule = Schedule.create(workspace=ws, name=pipeline_name + "_sch", pipeline_id=published_pipeline.id, experiment_name=pipeline_name, datastore=def_blob_store, wait_for_provisioning=True, description="Datastore scheduler for Pipeline" + pipeline_name, path_on_datastore=os.path.join('prednet/data/video', dataset, 'Train'), polling_interval=1 ) return pipeline_name
'keras', 'tensorflow', 'tensorflow-gpu', 'matplotlib', 'pillow', 'six', 'numpy', 'azureml-sdk', 'tqdm' ], conda_packages=['cudatoolkit=10.0.130'], entry_script='kd_squeezenet.py', use_gpu=True, node_count=1) from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal from azureml.pipeline.steps import HyperDriveStep from azureml.train.hyperdrive import choice, loguniform, uniform ps = RandomParameterSampling({ '--learning_rate': uniform(1e-3, 2e-2), '--momentum': uniform(.1, .95), '--weight_decay': loguniform(-5, -3), '--temperature': uniform(1, 9), # '--lambda_const': uniform(.1, .3), '--transfer_learning': choice("True", "False") }) policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=10) hdc = HyperDriveConfig( estimator=est, hyperparameter_sampling=ps, policy=policy, primary_metric_name='val_loss', primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
# run = exp.submit(est) # print(run) # run.wait_for_completion(show_output=True) ps = RandomParameterSampling({ '--batch_size': choice(2, 4, 8, 16), '--filter_sizes': choice("3 3 3", "4 4 4", "5 5 5"), '--stack_sizes': choice("48 96 192", "36 72 144", "12 24 48"), '--learning_rate': loguniform(-6, -1), '--lr_decay': loguniform(-9, -1) }) policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1) #, delay_evaluation=20) hdc = HyperDriveRunConfig(estimator=est, hyperparameter_sampling=ps, policy=policy, primary_metric_name='val_loss', primary_metric_goal=PrimaryMetricGoal.MINIMIZE, max_total_runs=5, max_concurrent_runs=50)
def build_pipeline(dataset, ws, config): print("building pipeline for dataset %s in workspace %s" % (dataset, ws.name)) hostname = socket.gethostname() if hostname == 'wopauliNC6': base_dir = '.' else: base_dir = '.' def_blob_store = ws.get_default_datastore() # folder for scripts that need to be uploaded to Aml compute target script_folder = './scripts' os.makedirs(script_folder, exist_ok=True) shutil.copy(os.path.join(base_dir, 'video_decoding.py'), script_folder) shutil.copy(os.path.join(base_dir, 'pipelines_submit.py'), script_folder) shutil.copy(os.path.join(base_dir, 'pipelines_build.py'), script_folder) shutil.copy(os.path.join(base_dir, 'train.py'), script_folder) shutil.copy(os.path.join(base_dir, 'data_utils.py'), script_folder) shutil.copy(os.path.join(base_dir, 'prednet.py'), script_folder) shutil.copy(os.path.join(base_dir, 'keras_utils.py'), script_folder) shutil.copy(os.path.join(base_dir, 'data_preparation.py'), script_folder) shutil.copy(os.path.join(base_dir, 'model_registration.py'), script_folder) shutil.copy(os.path.join(base_dir, 'config.json'), script_folder) cpu_compute_name = config['cpu_compute'] try: cpu_compute_target = AmlCompute(ws, cpu_compute_name) print("found existing compute target: %s" % cpu_compute_name) except ComputeTargetException: print("creating new compute target") provisioning_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_D2_V2', max_nodes=4, idle_seconds_before_scaledown=1800) cpu_compute_target = ComputeTarget.create(ws, cpu_compute_name, provisioning_config) cpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # use get_status() to get a detailed status for the current cluster. print(cpu_compute_target.get_status().serialize()) # choose a name for your cluster gpu_compute_name = config['gpu_compute'] try: gpu_compute_target = AmlCompute(workspace=ws, name=gpu_compute_name) print("found existing compute target: %s" % gpu_compute_name) except ComputeTargetException: print('Creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_NC6', max_nodes=5, idle_seconds_before_scaledown=1800) # create the cluster gpu_compute_target = ComputeTarget.create(ws, gpu_compute_name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it uses the scale settings for the cluster gpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # use get_status() to get a detailed status for the current cluster. print(gpu_compute_target.get_status().serialize()) # conda dependencies for compute targets cpu_cd = CondaDependencies.create(conda_packages=["py-opencv=3.4.2"], pip_packages=[ "azure-storage-blob==1.5.0", "hickle==3.4.3", "requests==2.21.0", "sklearn", "pandas==0.24.2", "azureml-sdk==1.0.21", "numpy==1.16.2", "pillow==6.0.0" ]) gpu_cd = CondaDependencies.create(pip_packages=[ "keras==2.0.8", "theano==1.0.4", "tensorflow==1.8.0", "tensorflow-gpu==1.8.0", "hickle==3.4.3", "matplotlib==3.0.3", "seaborn==0.9.0", "requests==2.21.0", "bs4==0.0.1", "imageio==2.5.0", "sklearn", "pandas==0.24.2", "azureml-sdk==1.0.21", "numpy==1.16.2" ]) # Runconfigs cpu_compute_run_config = RunConfiguration(conda_dependencies=cpu_cd) cpu_compute_run_config.environment.docker.enabled = True cpu_compute_run_config.environment.docker.gpu_support = False cpu_compute_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE cpu_compute_run_config.environment.spark.precache_packages = False gpu_compute_run_config = RunConfiguration(conda_dependencies=gpu_cd) gpu_compute_run_config.environment.docker.enabled = True gpu_compute_run_config.environment.docker.gpu_support = True gpu_compute_run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE gpu_compute_run_config.environment.spark.precache_packages = False print("PipelineData object created") video_data = DataReference(datastore=def_blob_store, data_reference_name="video_data", path_on_datastore=os.path.join( "prednet", "data", "video", dataset)) # Naming the intermediate data as processed_data1 and assigning it to the variable processed_data1. raw_data = PipelineData("raw_video_fames", datastore=def_blob_store) preprocessed_data = PipelineData("preprocessed_video_frames", datastore=def_blob_store) data_metrics = PipelineData("data_metrics", datastore=def_blob_store) data_output = PipelineData("output_data", datastore=def_blob_store) print("DataReference object created") # prepare dataset for training/testing prednet video_decoding = PythonScriptStep( name='decode_videos', script_name="video_decoding.py", arguments=["--input_data", video_data, "--output_data", raw_data], inputs=[video_data], outputs=[raw_data], compute_target=cpu_compute_target, source_directory=script_folder, runconfig=cpu_compute_run_config, allow_reuse=True, hash_paths=['.']) print("video_decode created") # prepare dataset for training/testing recurrent neural network data_prep = PythonScriptStep(name='prepare_data', script_name="data_preparation.py", arguments=[ "--input_data", raw_data, "--output_data", preprocessed_data ], inputs=[raw_data], outputs=[preprocessed_data], compute_target=cpu_compute_target, source_directory=script_folder, runconfig=cpu_compute_run_config, allow_reuse=True, hash_paths=['.']) data_prep.run_after(video_decoding) print("data_prep created") est = TensorFlow(source_directory=script_folder, compute_target=gpu_compute_target, pip_packages=[ 'keras==2.0.8', 'theano', 'tensorflow==1.8.0', 'tensorflow-gpu==1.8.0', 'matplotlib', 'horovod', 'hickle' ], entry_script='train.py', use_gpu=True, node_count=1) ps = RandomParameterSampling({ '--batch_size': choice(2, 4, 8, 16), '--filter_sizes': choice("3, 3, 3", "4, 4, 4", "5, 5, 5"), '--stack_sizes': choice("48, 96, 192", "36, 72, 144", "12, 24, 48"), #, "48, 96"), '--learning_rate': loguniform(-6, -1), '--lr_decay': loguniform(-9, -1), '--freeze_layers': choice("0, 1, 2", "1, 2, 3", "0, 1", "1, 2", "2, 3", "0", "1", "2", "3"), '--transfer_learning': choice("True", "False") }) policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=20) hdc = HyperDriveRunConfig( estimator=est, hyperparameter_sampling=ps, policy=policy, primary_metric_name='val_loss', primary_metric_goal=PrimaryMetricGoal.MINIMIZE, max_total_runs=5, #100, max_concurrent_runs=5, #10, max_duration_minutes=60 * 6) hd_step = HyperDriveStep(name="train_w_hyperdrive", hyperdrive_run_config=hdc, estimator_entry_script_arguments=[ '--data-folder', preprocessed_data, '--remote_execution' ], inputs=[preprocessed_data], metrics_output=data_metrics, allow_reuse=True) hd_step.run_after(data_prep) registration_step = PythonScriptStep( name='register_model', script_name='model_registration.py', arguments=['--input_dir', data_metrics, '--output_dir', data_output], compute_target=gpu_compute_target, inputs=[data_metrics], outputs=[data_output], source_directory=script_folder, allow_reuse=True, hash_paths=['.']) registration_step.run_after(hd_step) pipeline = Pipeline( workspace=ws, steps=[video_decoding, data_prep, hd_step, registration_step]) print("Pipeline is built") pipeline.validate() print("Simple validation complete") pipeline_name = 'prednet_' + dataset pipeline.publish(name=pipeline_name) return pipeline_name
print('predictions:\t', y_hat[:30]) print("Accuracy on the test set:", np.average(y_hat == y_test)) from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal from azureml.train.hyperdrive import choice, loguniform ps = RandomParameterSampling({ '--batch-size': choice(32, 64, 128), '--first-layer-neurons': choice(16, 64, 128, 256, 512), '--second-layer-neurons': choice(16, 64, 256, 512), '--learning-rate': loguniform(-6, -1) }) est = TensorFlow(source_directory=script_folder, script_params={ '--data-folder': dataset.as_named_input('mnist').as_mount() }, compute_target=compute_target, entry_script='tf_mnist2.py', framework_version='2.0', use_gpu=True, pip_packages=['azureml-dataprep[pandas,fuse]']) policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)
'keras==2.2.4', 'tensorflow==1.12.0', 'tensorflow-gpu==1.12.0', 'matplotlib', 'horovod', 'hickle', 'pillow==5.1.0', 'six==1.11.0', 'numpy==1.14.5' ], entry_script='vanilla_squeezenet.py', use_gpu=True, node_count=1) # run = exp.submit(est) # print(run) # run.wait_for_completion(show_output=True) ps = RandomParameterSampling({ '--learning_rate': loguniform(-4, -2), '--momentum': loguniform(-9, -1), '--weight_decay': loguniform(-3, -1), '--transfer_learning': choice("True", "False") }) policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=10) hdc = HyperDriveRunConfig(estimator=est, hyperparameter_sampling=ps, policy=policy, primary_metric_name='val_loss', primary_metric_goal=PrimaryMetricGoal.MINIMIZE, max_total_runs=100,