Exemplo n.º 1
0
def get_step_uuid(pipeline: Pipeline) -> str:
    """Gets the currently running script's step UUID.

    Args:
        pipeline: Pipeline object describing the pipeline and its steps.

    Returns:
        The UUID of the currently running step. May it be through an
        active Jupyter kernel or as part of a partial run.

    Raises:
        StepUUIDResolveError: The step's UUID cannot be resolved.
    """
    # In case of partial runs, the step UUID can be obtained via the
    # environment.
    if "ORCHEST_STEP_UUID" in os.environ:
        return os.environ["ORCHEST_STEP_UUID"]

    # The KERNEL_ID environment variable is set by the Jupyter
    # Enterprise Gateway.
    kernel_id = os.environ.get("KERNEL_ID")
    if kernel_id is None:
        raise StepUUIDResolveError(
            'Environment variable "KERNEL_ID" not present.')

    # Get JupyterLab sessions to resolve the step's UUID via the id of
    # the running kernel and the step's associated file path.
    session_uuid = Config.PROJECT_UUID[:18] + pipeline.properties["uuid"][:18]
    jupyter_sessions = _request_json(
        f"http://jupyter-server-{session_uuid}/jupyter-server-{session_uuid}/"
        "api/sessions")

    for session in jupyter_sessions:
        if session["kernel"]["id"] == kernel_id:
            notebook_path = session["notebook"]["path"]
            break
    else:
        raise StepUUIDResolveError(
            'Jupyter session data has no "kernel" with "id" equal to the '
            f'"KERNEL_ID" of this step: {kernel_id}.')

    for step in pipeline.steps:
        # Compare basenames, one pipeline can not have duplicate
        # notebook names, so this should work
        if os.path.basename(step.properties["file_path"]) == os.path.basename(
                notebook_path):
            # NOTE: the UUID cannot be cached here. Because if the
            # notebook is assigned to a different step, then the env
            # variable does not change and thus the notebooks wrongly
            # thinks it is a different step.
            return step.properties["uuid"]

    raise StepUUIDResolveError(
        f'No step with "notebook_path": {notebook_path}.')
Exemplo n.º 2
0
def update_params(params: Dict[str, Any]) -> None:
    """Updates the parameters of the current step.

    Additionally, you can set new parameters by giving parameters that
    do not yet exist in the current parameters of the pipeline step.

    Internally the updating is done by calling the ``dict.update``
    method. This further explains the behavior of this method.

    Args:
        params: The parameters to update. Either updating their values
            or adding new parameter keys.

    """
    with open(Config.PIPELINE_DEFINITION_PATH, "r") as f:
        pipeline_definition = json.load(f)

    pipeline = Pipeline.from_json(pipeline_definition)
    try:
        step_uuid = get_step_uuid(pipeline)
    except StepUUIDResolveError:
        raise StepUUIDResolveError("Parameters could not be identified.")

    step = pipeline.get_step_by_uuid(step_uuid)
    step.update_params(params)

    with open(Config.PIPELINE_DEFINITION_PATH, "w") as f:
        json.dump(pipeline.to_dict(), f)
Exemplo n.º 3
0
def update_params(step_params: Optional[dict] = None,
                  pipeline_params: Optional[dict] = None) -> None:
    """Updates the parameters of the current step and of the pipeline.

    Additionally, you can set new parameters by giving parameters that
    do not yet exist in the current parameters, either of the step or of
    the pipeline.

    Internally the updating is done by calling the ``dict.update``
    method. This further explains the behavior of this method.

    Args:
        step_params: The step parameters to update. Either updating
            their values or adding new parameter keys.
        pipeline_params: The pipeline parameters to update. Either
            updating their values or adding new parameter keys.

    Warning:
        Updating the parameters of a pipeline can lead to race
        conditions, since different steps could be updating them at
        the same time. Making sure that the correct behaviour takes
        place, when it comes to pipeline parameters, is responsibility
        of the user, e.g. by making a pipeline where no steps that
        modify the pipeline parameters can run in parallel, or by using
        external forms of locking. Updating the parameters of a single
        step is perfectly safe.


    """
    with open(Config.PIPELINE_DEFINITION_PATH, "r") as f:
        pipeline_definition = json.load(f)

    pipeline = Pipeline.from_json(pipeline_definition)

    if pipeline_params is not None:
        pipeline.update_params(pipeline_params)

    if step_params is not None:
        try:
            step_uuid = get_step_uuid(pipeline)
        except StepUUIDResolveError:
            raise StepUUIDResolveError("Parameters could not be identified.")

        step = pipeline.get_step_by_uuid(step_uuid)
        step.update_params(step_params)

    with open(Config.PIPELINE_DEFINITION_PATH, "w") as f:
        json.dump(pipeline.to_dict(), f, indent=4, sort_keys=True)
Exemplo n.º 4
0
def get_params() -> Dict[str, Any]:
    """Gets the parameters of the current step.

    Returns:
        The parameters of the current step.
    """
    with open(Config.PIPELINE_DEFINITION_PATH, "r") as f:
        pipeline_definition = json.load(f)

    pipeline = Pipeline.from_json(pipeline_definition)
    try:
        step_uuid = get_step_uuid(pipeline)
    except StepUUIDResolveError:
        raise StepUUIDResolveError("Parameters could not be identified.")

    step = pipeline.get_step_by_uuid(step_uuid)
    params = step.get_params()

    return params
Exemplo n.º 5
0
def update_params(
    step_params: Optional[dict] = None, pipeline_params: Optional[dict] = None
) -> None:
    """Updates the parameters of the current step and of the pipeline.

    Additionally, you can set new parameters by giving parameters that
    do not yet exist in the current parameters, either of the step or of
    the pipeline.

    Internally the updating is done by calling the ``dict.update``
    method. This further explains the behavior of this method.

    Args:
        step_params: The step parameters to update. Either updating
            their values or adding new parameter keys.
        pipeline_params: The pipeline parameters to update. Either
            updating their values or adding new parameter keys.

    Warning:
        Updating the `pipeline_params` can lead to read conditions,
        since different steps could be updating them at the same time.

    """
    with open(Config.PIPELINE_DEFINITION_PATH, "r") as f:
        pipeline_definition = json.load(f)

    pipeline = Pipeline.from_json(pipeline_definition)

    if pipeline_params is not None:
        pipeline.update_params(pipeline_params)

    if step_params is not None:
        try:
            step_uuid = get_step_uuid(pipeline)
        except StepUUIDResolveError:
            raise StepUUIDResolveError("Parameters could not be identified.")

        step = pipeline.get_step_by_uuid(step_uuid)
        step.update_params(step_params)

    with open(Config.PIPELINE_DEFINITION_PATH, "w") as f:
        json.dump(pipeline.to_dict(), f, indent=4, sort_keys=True)
Exemplo n.º 6
0
def get_params() -> Tuple[dict, dict]:
    """Gets the parameters of the current step and the pipeline.

    Returns:
        A tuple of two elements, where the first is the parameters of
        the current step, the second is the parameters of the pipeline.
    """
    with open(Config.PIPELINE_DEFINITION_PATH, "r") as f:
        pipeline_definition = json.load(f)

    pipeline = Pipeline.from_json(pipeline_definition)
    try:
        step_uuid = get_step_uuid(pipeline)
    except StepUUIDResolveError:
        raise StepUUIDResolveError("Parameters could not be identified.")

    step = pipeline.get_step_by_uuid(step_uuid)
    params = step.get_params()

    return params, pipeline.get_params()
Exemplo n.º 7
0
def _get_current_step(pipeline: Pipeline) -> PipelineStep:
    try:
        step_uuid = get_step_uuid(pipeline)
    except StepUUIDResolveError:
        raise StepUUIDResolveError("Parameters could not be identified.")
    return pipeline.get_step_by_uuid(step_uuid)
Exemplo n.º 8
0
def get_step_uuid(pipeline: Pipeline) -> str:
    """Gets the currently running script's step UUID.

    Args:
        pipeline: Pipeline object describing the pipeline and its steps.

    Returns:
        The UUID of the currently running step. May it be through an
        active Jupyter kernel or as part of a partial run.

    Raises:
        StepUUIDResolveError: The step's UUID cannot be resolved.
    """
    # In case of partial runs, the step UUID can be obtained via the
    # environment.
    if "ORCHEST_STEP_UUID" in os.environ:
        return os.environ["ORCHEST_STEP_UUID"]

    # The KERNEL_ID environment variable is set by the Jupyter
    # Enterprise Gateway.
    kernel_id = os.environ.get("KERNEL_ID")
    if kernel_id is None:
        raise StepUUIDResolveError('Environment variable "KERNEL_ID" not present.')

    # Get JupyterLab sessions to resolve the step's UUID via the id of
    # the running kernel and the step's associated file path.
    # Orchest API --jupyter_server_ip/port--> Jupyter sessions --notebook path--> UUID.
    launches_url = (
        f"http://orchest-api/api/sessions/"
        f'{Config.PROJECT_UUID}/{pipeline.properties["uuid"]}'
    )
    launch_data = _request_json(launches_url)

    # NOTE: the `proxy_prefix` already includes the "/" at the start
    jupyter_api_url = "http://{ip}:{port}{proxy_prefix}/api/sessions"
    jupyter_api_url = jupyter_api_url.format(
        ip=launch_data["jupyter_server_ip"],
        port=launch_data["notebook_server_info"]["port"],
        proxy_prefix=launch_data["notebook_server_info"]["base_url"],
    )
    jupyter_sessions = _request_json(jupyter_api_url)

    for session in jupyter_sessions:
        if session["kernel"]["id"] == kernel_id:
            notebook_path = session["notebook"]["path"]
            break
    else:
        raise StepUUIDResolveError(
            f'Jupyter session data has no "kernel" with "id" equal to the '
            '"KERNEL_ID" of this step: {kernel_id}.'
        )

    for step in pipeline.steps:
        # Compare basenames, one pipeline can not have duplicate notebook names,
        # so this should work
        if os.path.basename(step.properties["file_path"]) == os.path.basename(
            notebook_path
        ):
            # NOTE: the UUID cannot be cached here. Because if the
            # notebook is assigned to a different step, then the env
            # variable does not change and thus the notebooks wrongly
            # thinks it is a different step.
            return step.properties["uuid"]

    raise StepUUIDResolveError(f'No step with "notebook_path": {notebook_path}.')