示例#1
0
def test_dnn():
    old_uri = tracking.get_tracking_uri()
    try:
        with TempDir(chdr=False, remove_on_exit=True) as tmp:
            diamonds = tmp.path("diamonds")
            estimator = tmp.path("estimator")
            artifacts = tmp.path("artifacts")
            os.mkdir(diamonds)
            os.mkdir(estimator)
            os.mkdir(artifacts)
            tracking.set_tracking_uri(artifacts)
            # Download the diamonds dataset via mlflow run
            run(".",
                entry_point="main",
                version=None,
                parameters={"dest-dir": diamonds},
                experiment_id=tracking._get_experiment_id(),
                mode="local",
                cluster_spec=None,
                git_username=None,
                git_password=None,
                use_conda=True,
                storage_dir=None)

            # Run the main dnn app via mlflow
            run("apps/dnn-regression",
                entry_point="main",
                version=None,
                parameters={
                    "model-dir": estimator,
                    "train": os.path.join(diamonds, "train_diamonds.parquet"),
                    "test": os.path.join(diamonds, "test_diamonds.parquet"),
                    "hidden-units": "30,30",
                    "label-col": "price",
                    "steps": 5000,
                    "batch-size": 128
                },
                experiment_id=tracking._get_experiment_id(),
                mode="local",
                cluster_spec=None,
                git_username=None,
                git_password=None,
                use_conda=True,
                storage_dir=None)

            # Loading the saved model as a pyfunc.
            pyfunc = tensorflow.load_pyfunc(
                os.path.join(estimator,
                             os.listdir(estimator)[0]))

            df = pandas.read_parquet(
                os.path.join(diamonds, "test_diamonds.parquet"))

            predict_df = pyfunc.predict(df)
            assert 'predictions' in predict_df
            assert isinstance(predict_df['predictions'][0][0], numpy.float32)
    finally:
        tracking.set_tracking_uri(old_uri)
示例#2
0
def _run_project(project, entry_point, work_dir, parameters, use_conda,
                 storage_dir, experiment_id):
    """Locally run a project that has been checked out in `work_dir`."""
    storage_dir_for_run = _get_storage_dir(storage_dir)
    eprint(
        "=== Created directory %s for downloading remote URIs passed to arguments of "
        "type 'path' ===" % storage_dir_for_run)
    # Try to build the command first in case the user mis-specified parameters
    run_project_command = project.get_entry_point(entry_point)\
        .compute_command(parameters, storage_dir_for_run)
    commands = []
    if use_conda:
        conda_env_path = os.path.abspath(
            os.path.join(work_dir, project.conda_env))
        _maybe_create_conda_env(conda_env_path)
        commands.append("source activate %s" %
                        _get_conda_env_name(conda_env_path))

    # Create a new run and log every provided parameter into it.
    active_run = tracking.start_run(
        experiment_id=experiment_id,
        source_name=project.uri,
        source_version=tracking._get_git_commit(work_dir),
        entry_point_name=entry_point,
        source_type=SourceType.PROJECT)
    if parameters is not None:
        for key, value in parameters.items():
            active_run.log_param(Param(key, value))
    # Add the run id into a magic environment variable that the subprocess will read,
    # causing it to reuse the run.
    exp_id = experiment_id or tracking._get_experiment_id()
    env_map = {
        tracking._RUN_NAME_ENV_VAR: active_run.run_info.run_uuid,
        tracking._TRACKING_URI_ENV_VAR: tracking.get_tracking_uri(),
        tracking._EXPERIMENT_ID_ENV_VAR: str(exp_id),
    }

    commands.append(run_project_command)
    command = " && ".join(commands)
    eprint("=== Running command: %s ===" % command)
    try:
        process.exec_cmd([os.environ.get("SHELL", "bash"), "-c", command],
                         cwd=work_dir,
                         stream_output=True,
                         env=env_map)
        tracking.end_run()
        eprint("=== Run succeeded ===")
    except process.ShellCommandException:
        tracking.end_run("FAILED")
        eprint("=== Run failed ===")
示例#3
0
def _run_project(project, entry_point, work_dir, parameters, use_conda,
                 storage_dir, experiment_id):
    """Locally run a project that has been checked out in `work_dir`."""
    mlflow.set_tracking_uri('..\\')  #added by cliicy
    if storage_dir is not None and not os.path.exists(storage_dir):
        os.makedirs(storage_dir)
    storage_dir_for_run = tempfile.mkdtemp(dir=storage_dir)
    print(
        "=== Created directory %s for downloading remote URIs passed to arguments of "
        "type 'path' ===" % storage_dir_for_run)
    # Try to build the command first in case the user mis-specified parameters
    run_project_command = project.get_entry_point(entry_point).compute_command(
        parameters, storage_dir_for_run)
    commands = []

    # Create a new run and log every provided parameter into it.
    active_run = tracking.start_run(
        experiment_id=experiment_id,
        source_name=project.uri,
        source_version=tracking._get_git_commit(work_dir),
        entry_point_name=entry_point,
        source_type=SourceType.PROJECT)
    for key, value in parameters.items():
        active_run.log_param(Param(key, value))
    # Add the run id into a magic environment variable that the subprocess will read,
    # causing it to reuse the run.
    exp_id = experiment_id or tracking._get_experiment_id()
    env_map = {
        tracking._RUN_NAME_ENV_VAR: active_run.run_info.run_uuid,
        tracking._TRACKING_URI_ENV_VAR: tracking.get_tracking_uri(),
        tracking._EXPERIMENT_ID_ENV_VAR: str(exp_id),
    }

    commands.append(run_project_command)
    command = " && ".join(commands)
    print("=== Running command: %s ===" % command)
    try:
        command = "python my_train.py 0.4 0.1"
        print("will run command aaaaa " + command + " " + work_dir + " aaaaa ")
        process.exec_cmd(command,
                         cwd=work_dir,
                         stream_output=True,
                         env=env_map)
        #process.exec_cmd([os.environ.get("SHELL", "bash"), "-c", command], cwd=work_dir,
        #                 stream_output=True, env=env_map)
        tracking.end_run()
        print("=== Run succeeded ===")
    except process.ShellCommandException:
        tracking.end_run("FAILED")
        print("=== Run failed ===")
示例#4
0
def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=None,
         mode=None, cluster_spec=None, git_username=None, git_password=None, use_conda=True,
         use_temp_cwd=False, storage_dir=None, block=True):
    exp_id = experiment_id or tracking._get_experiment_id()
    if mode is None or mode == "local":
        return _run_local(
            uri=uri, entry_point=entry_point, version=version, parameters=parameters,
            experiment_id=exp_id, use_conda=use_conda, use_temp_cwd=use_temp_cwd,
            storage_dir=storage_dir, git_username=git_username, git_password=git_password,
            block=block)
    if mode == "databricks":
        from mlflow.projects.databricks import run_databricks
        return run_databricks(
            uri=uri, entry_point=entry_point, version=version, parameters=parameters,
            experiment_id=exp_id, cluster_spec=cluster_spec, git_username=git_username,
            git_password=git_password)
    supported_modes = ["local", "databricks"]
    raise ExecutionException("Got unsupported execution mode %s. Supported "
                             "values: %s" % (mode, supported_modes))
示例#5
0
def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=None,
         mode=None, cluster_spec=None, git_username=None, git_password=None, use_conda=True,
         use_temp_cwd=False, storage_dir=None, block=True, run_id=None):
    """
    Helper that delegates to the project-running method corresponding to the passed-in mode.
    Returns a ``SubmittedRun`` corresponding to the project run.
    """
    exp_id = experiment_id or tracking._get_experiment_id()
    parameters = parameters or {}
    if mode == "databricks":
        from mlflow.projects.databricks import run_databricks
        return run_databricks(
            uri=uri, entry_point=entry_point, version=version, parameters=parameters,
            experiment_id=exp_id, cluster_spec=cluster_spec, git_username=git_username,
            git_password=git_password)
    elif mode == "local" or mode is None:
        work_dir = _fetch_project(uri, use_temp_cwd, version, git_username, git_password)
        project = _load_project(project_dir=work_dir)
        project.get_entry_point(entry_point)._validate_parameters(parameters)
        # Synchronously create a conda environment (even though this may take some time) to avoid
        # failures due to multiple concurrent attempts to create the same conda env.
        if use_conda:
            _maybe_create_conda_env(conda_env_path=os.path.join(work_dir, project.conda_env))
        if run_id:
            active_run = tracking._get_existing_run(run_id)
        else:
            active_run = _create_run(uri, exp_id, work_dir, entry_point, parameters)
        # In blocking mode, run the entry point command in blocking fashion, sending status updates
        # to the tracking server when finished. Note that the run state may not be persisted to the
        # tracking server if interrupted
        if block:
            command = _get_entry_point_command(
                work_dir, entry_point, use_conda, parameters, storage_dir)
            return _run_entry_point(command, work_dir, exp_id, run_id=active_run.run_info.run_uuid)
        # Otherwise, invoke `mlflow run` in a subprocess
        return _invoke_mlflow_run_subprocess(
            work_dir=work_dir, entry_point=entry_point, parameters=parameters, experiment_id=exp_id,
            use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.run_info.run_uuid)
    supported_modes = ["local", "databricks"]
    raise ExecutionException("Got unsupported execution mode %s. Supported "
                             "values: %s" % (mode, supported_modes))
示例#6
0
def _run_project(project, entry_point, work_dir, parameters, use_conda,
                 storage_dir, experiment_id):
    """Locally run a project that has been checked out in `work_dir`."""
    if storage_dir is not None and not os.path.exists(storage_dir):
        os.makedirs(storage_dir)
    storage_dir_for_run = tempfile.mkdtemp(dir=storage_dir)
    eprint(
        "=== Created directory %s for downloading remote URIs passed to arguments of "
        "type 'path' ===" % storage_dir_for_run)
    # Try to build the command first in case the user mis-specified parameters
    run_project_command = project.get_entry_point(entry_point)\
        .compute_command(parameters, storage_dir_for_run)
    commands = []
    if use_conda:
        with open(os.path.join(work_dir, project.conda_env)) as conda_env_file:
            conda_env_sha = hashlib.sha1(
                conda_env_file.read().encode("utf-8")).hexdigest()
        conda_env = "mlflow-%s" % conda_env_sha
        (exit_code, _, stderr) = process.exec_cmd(["conda", "--help"],
                                                  throw_on_error=False)
        if exit_code != 0:
            eprint(
                'conda is not installed properly. Please follow the instructions on '
                'https://conda.io/docs/user-guide/install/index.html')
            eprint(stderr)
            sys.exit(1)
        (_, stdout,
         stderr) = process.exec_cmd(["conda", "env", "list", "--json"])
        env_names = [
            os.path.basename(env) for env in json.loads(stdout)['envs']
        ]

        conda_action = 'create'
        if conda_env not in env_names:
            eprint('=== Creating conda environment %s ===' % conda_env)
            process.exec_cmd([
                "conda", "env", conda_action, "-n", conda_env, "--file",
                project.conda_env
            ],
                             cwd=work_dir,
                             stream_output=True)
        commands.append("source activate %s" % conda_env)

    # Create a new run and log every provided parameter into it.
    active_run = tracking.start_run(
        experiment_id=experiment_id,
        source_name=project.uri,
        source_version=tracking._get_git_commit(work_dir),
        entry_point_name=entry_point,
        source_type=SourceType.PROJECT)
    for key, value in parameters.items():
        active_run.log_param(Param(key, value))
    # Add the run id into a magic environment variable that the subprocess will read,
    # causing it to reuse the run.
    exp_id = experiment_id or tracking._get_experiment_id()
    env_map = {
        tracking._RUN_NAME_ENV_VAR: active_run.run_info.run_uuid,
        tracking._TRACKING_URI_ENV_VAR: tracking.get_tracking_uri(),
        tracking._EXPERIMENT_ID_ENV_VAR: str(exp_id),
    }

    commands.append(run_project_command)
    command = " && ".join(commands)
    eprint("=== Running command: %s ===" % command)
    try:
        process.exec_cmd([os.environ.get("SHELL", "bash"), "-c", command],
                         cwd=work_dir,
                         stream_output=True,
                         env=env_map)
        tracking.end_run()
        eprint("=== Run succeeded ===")
    except process.ShellCommandException:
        tracking.end_run("FAILED")
        eprint("=== Run failed ===")