def test_dnn(): old_uri = tracking.get_tracking_uri() try: with TempDir(chdr=False, remove_on_exit=True) as tmp: diamonds = tmp.path("diamonds") estimator = tmp.path("estimator") artifacts = tmp.path("artifacts") os.mkdir(diamonds) os.mkdir(estimator) os.mkdir(artifacts) tracking.set_tracking_uri(artifacts) # Download the diamonds dataset via mlflow run run(".", entry_point="main", version=None, parameters={"dest-dir": diamonds}, experiment_id=tracking._get_experiment_id(), mode="local", cluster_spec=None, git_username=None, git_password=None, use_conda=True, storage_dir=None) # Run the main dnn app via mlflow run("apps/dnn-regression", entry_point="main", version=None, parameters={ "model-dir": estimator, "train": os.path.join(diamonds, "train_diamonds.parquet"), "test": os.path.join(diamonds, "test_diamonds.parquet"), "hidden-units": "30,30", "label-col": "price", "steps": 5000, "batch-size": 128 }, experiment_id=tracking._get_experiment_id(), mode="local", cluster_spec=None, git_username=None, git_password=None, use_conda=True, storage_dir=None) # Loading the saved model as a pyfunc. pyfunc = tensorflow.load_pyfunc( os.path.join(estimator, os.listdir(estimator)[0])) df = pandas.read_parquet( os.path.join(diamonds, "test_diamonds.parquet")) predict_df = pyfunc.predict(df) assert 'predictions' in predict_df assert isinstance(predict_df['predictions'][0][0], numpy.float32) finally: tracking.set_tracking_uri(old_uri)
def _run_project(project, entry_point, work_dir, parameters, use_conda, storage_dir, experiment_id): """Locally run a project that has been checked out in `work_dir`.""" storage_dir_for_run = _get_storage_dir(storage_dir) eprint( "=== Created directory %s for downloading remote URIs passed to arguments of " "type 'path' ===" % storage_dir_for_run) # Try to build the command first in case the user mis-specified parameters run_project_command = project.get_entry_point(entry_point)\ .compute_command(parameters, storage_dir_for_run) commands = [] if use_conda: conda_env_path = os.path.abspath( os.path.join(work_dir, project.conda_env)) _maybe_create_conda_env(conda_env_path) commands.append("source activate %s" % _get_conda_env_name(conda_env_path)) # Create a new run and log every provided parameter into it. active_run = tracking.start_run( experiment_id=experiment_id, source_name=project.uri, source_version=tracking._get_git_commit(work_dir), entry_point_name=entry_point, source_type=SourceType.PROJECT) if parameters is not None: for key, value in parameters.items(): active_run.log_param(Param(key, value)) # Add the run id into a magic environment variable that the subprocess will read, # causing it to reuse the run. exp_id = experiment_id or tracking._get_experiment_id() env_map = { tracking._RUN_NAME_ENV_VAR: active_run.run_info.run_uuid, tracking._TRACKING_URI_ENV_VAR: tracking.get_tracking_uri(), tracking._EXPERIMENT_ID_ENV_VAR: str(exp_id), } commands.append(run_project_command) command = " && ".join(commands) eprint("=== Running command: %s ===" % command) try: process.exec_cmd([os.environ.get("SHELL", "bash"), "-c", command], cwd=work_dir, stream_output=True, env=env_map) tracking.end_run() eprint("=== Run succeeded ===") except process.ShellCommandException: tracking.end_run("FAILED") eprint("=== Run failed ===")
def _run_project(project, entry_point, work_dir, parameters, use_conda, storage_dir, experiment_id): """Locally run a project that has been checked out in `work_dir`.""" mlflow.set_tracking_uri('..\\') #added by cliicy if storage_dir is not None and not os.path.exists(storage_dir): os.makedirs(storage_dir) storage_dir_for_run = tempfile.mkdtemp(dir=storage_dir) print( "=== Created directory %s for downloading remote URIs passed to arguments of " "type 'path' ===" % storage_dir_for_run) # Try to build the command first in case the user mis-specified parameters run_project_command = project.get_entry_point(entry_point).compute_command( parameters, storage_dir_for_run) commands = [] # Create a new run and log every provided parameter into it. active_run = tracking.start_run( experiment_id=experiment_id, source_name=project.uri, source_version=tracking._get_git_commit(work_dir), entry_point_name=entry_point, source_type=SourceType.PROJECT) for key, value in parameters.items(): active_run.log_param(Param(key, value)) # Add the run id into a magic environment variable that the subprocess will read, # causing it to reuse the run. exp_id = experiment_id or tracking._get_experiment_id() env_map = { tracking._RUN_NAME_ENV_VAR: active_run.run_info.run_uuid, tracking._TRACKING_URI_ENV_VAR: tracking.get_tracking_uri(), tracking._EXPERIMENT_ID_ENV_VAR: str(exp_id), } commands.append(run_project_command) command = " && ".join(commands) print("=== Running command: %s ===" % command) try: command = "python my_train.py 0.4 0.1" print("will run command aaaaa " + command + " " + work_dir + " aaaaa ") process.exec_cmd(command, cwd=work_dir, stream_output=True, env=env_map) #process.exec_cmd([os.environ.get("SHELL", "bash"), "-c", command], cwd=work_dir, # stream_output=True, env=env_map) tracking.end_run() print("=== Run succeeded ===") except process.ShellCommandException: tracking.end_run("FAILED") print("=== Run failed ===")
def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=None, mode=None, cluster_spec=None, git_username=None, git_password=None, use_conda=True, use_temp_cwd=False, storage_dir=None, block=True): exp_id = experiment_id or tracking._get_experiment_id() if mode is None or mode == "local": return _run_local( uri=uri, entry_point=entry_point, version=version, parameters=parameters, experiment_id=exp_id, use_conda=use_conda, use_temp_cwd=use_temp_cwd, storage_dir=storage_dir, git_username=git_username, git_password=git_password, block=block) if mode == "databricks": from mlflow.projects.databricks import run_databricks return run_databricks( uri=uri, entry_point=entry_point, version=version, parameters=parameters, experiment_id=exp_id, cluster_spec=cluster_spec, git_username=git_username, git_password=git_password) supported_modes = ["local", "databricks"] raise ExecutionException("Got unsupported execution mode %s. Supported " "values: %s" % (mode, supported_modes))
def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=None, mode=None, cluster_spec=None, git_username=None, git_password=None, use_conda=True, use_temp_cwd=False, storage_dir=None, block=True, run_id=None): """ Helper that delegates to the project-running method corresponding to the passed-in mode. Returns a ``SubmittedRun`` corresponding to the project run. """ exp_id = experiment_id or tracking._get_experiment_id() parameters = parameters or {} if mode == "databricks": from mlflow.projects.databricks import run_databricks return run_databricks( uri=uri, entry_point=entry_point, version=version, parameters=parameters, experiment_id=exp_id, cluster_spec=cluster_spec, git_username=git_username, git_password=git_password) elif mode == "local" or mode is None: work_dir = _fetch_project(uri, use_temp_cwd, version, git_username, git_password) project = _load_project(project_dir=work_dir) project.get_entry_point(entry_point)._validate_parameters(parameters) # Synchronously create a conda environment (even though this may take some time) to avoid # failures due to multiple concurrent attempts to create the same conda env. if use_conda: _maybe_create_conda_env(conda_env_path=os.path.join(work_dir, project.conda_env)) if run_id: active_run = tracking._get_existing_run(run_id) else: active_run = _create_run(uri, exp_id, work_dir, entry_point, parameters) # In blocking mode, run the entry point command in blocking fashion, sending status updates # to the tracking server when finished. Note that the run state may not be persisted to the # tracking server if interrupted if block: command = _get_entry_point_command( work_dir, entry_point, use_conda, parameters, storage_dir) return _run_entry_point(command, work_dir, exp_id, run_id=active_run.run_info.run_uuid) # Otherwise, invoke `mlflow run` in a subprocess return _invoke_mlflow_run_subprocess( work_dir=work_dir, entry_point=entry_point, parameters=parameters, experiment_id=exp_id, use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.run_info.run_uuid) supported_modes = ["local", "databricks"] raise ExecutionException("Got unsupported execution mode %s. Supported " "values: %s" % (mode, supported_modes))
def _run_project(project, entry_point, work_dir, parameters, use_conda, storage_dir, experiment_id): """Locally run a project that has been checked out in `work_dir`.""" if storage_dir is not None and not os.path.exists(storage_dir): os.makedirs(storage_dir) storage_dir_for_run = tempfile.mkdtemp(dir=storage_dir) eprint( "=== Created directory %s for downloading remote URIs passed to arguments of " "type 'path' ===" % storage_dir_for_run) # Try to build the command first in case the user mis-specified parameters run_project_command = project.get_entry_point(entry_point)\ .compute_command(parameters, storage_dir_for_run) commands = [] if use_conda: with open(os.path.join(work_dir, project.conda_env)) as conda_env_file: conda_env_sha = hashlib.sha1( conda_env_file.read().encode("utf-8")).hexdigest() conda_env = "mlflow-%s" % conda_env_sha (exit_code, _, stderr) = process.exec_cmd(["conda", "--help"], throw_on_error=False) if exit_code != 0: eprint( 'conda is not installed properly. Please follow the instructions on ' 'https://conda.io/docs/user-guide/install/index.html') eprint(stderr) sys.exit(1) (_, stdout, stderr) = process.exec_cmd(["conda", "env", "list", "--json"]) env_names = [ os.path.basename(env) for env in json.loads(stdout)['envs'] ] conda_action = 'create' if conda_env not in env_names: eprint('=== Creating conda environment %s ===' % conda_env) process.exec_cmd([ "conda", "env", conda_action, "-n", conda_env, "--file", project.conda_env ], cwd=work_dir, stream_output=True) commands.append("source activate %s" % conda_env) # Create a new run and log every provided parameter into it. active_run = tracking.start_run( experiment_id=experiment_id, source_name=project.uri, source_version=tracking._get_git_commit(work_dir), entry_point_name=entry_point, source_type=SourceType.PROJECT) for key, value in parameters.items(): active_run.log_param(Param(key, value)) # Add the run id into a magic environment variable that the subprocess will read, # causing it to reuse the run. exp_id = experiment_id or tracking._get_experiment_id() env_map = { tracking._RUN_NAME_ENV_VAR: active_run.run_info.run_uuid, tracking._TRACKING_URI_ENV_VAR: tracking.get_tracking_uri(), tracking._EXPERIMENT_ID_ENV_VAR: str(exp_id), } commands.append(run_project_command) command = " && ".join(commands) eprint("=== Running command: %s ===" % command) try: process.exec_cmd([os.environ.get("SHELL", "bash"), "-c", command], cwd=work_dir, stream_output=True, env=env_map) tracking.end_run() eprint("=== Run succeeded ===") except process.ShellCommandException: tracking.end_run("FAILED") eprint("=== Run failed ===")