def test_import_keras_with_fluent_autolog_enables_tensorflow_autologging():
    mlflow.autolog()

    import keras  # pylint: disable=unused-variable,unused-import

    assert not autologging_is_disabled(mlflow.tensorflow.FLAVOR_NAME)
    assert autologging_is_disabled(mlflow.keras.FLAVOR_NAME)
def test_autologging_integrations_expose_configs_and_support_disablement(integration):
    for integration in AUTOLOGGING_INTEGRATIONS_TO_TEST:
        integration.autolog(disable=False)

    assert not autologging_is_disabled(integration.FLAVOR_NAME)
    assert not get_autologging_config(integration.FLAVOR_NAME, "disable", True)

    integration.autolog(disable=True)

    assert autologging_is_disabled(integration.FLAVOR_NAME)
    assert get_autologging_config(integration.FLAVOR_NAME, "disable", False)
def test_import_tf_keras_with_fluent_autolog_enables_tf_autologging():
    mlflow.autolog()

    import tensorflow.keras  # pylint: disable=unused-variable,unused-import

    assert not autologging_is_disabled(mlflow.tensorflow.FLAVOR_NAME)

    # NB: For backwards compatibility, fluent autologging enables TensorFlow and
    # Keras autologging upon tf.keras import in TensorFlow 2.5.1
    if Version(tf.__version__) != Version("2.5.1"):
        assert autologging_is_disabled(mlflow.keras.FLAVOR_NAME)
def test_import_tf_keras_with_fluent_autolog_enables_tf_autologging():
    mlflow.autolog()

    import tensorflow.keras  # pylint: disable=unused-variable,unused-import

    assert not autologging_is_disabled(mlflow.tensorflow.FLAVOR_NAME)

    # NB: In Tensorflow >= 2.6, we redirect keras autologging to tensorflow autologging
    # so the original keras autologging is disabled
    if Version(tf.__version__) >= Version("2.6"):
        # NB: For TF >= 2.6, import tensorflow.keras will trigger importing keras
        assert autologging_is_disabled(mlflow.keras.FLAVOR_NAME)
def test_universal_autolog_calls_pyspark_immediately():
    mlflow.autolog()
    assert not autologging_is_disabled(mlflow.spark.FLAVOR_NAME)

    mlflow.autolog(disable=True)
    assert autologging_is_disabled(mlflow.spark.FLAVOR_NAME)

    mlflow.autolog(disable=False)
    assert not autologging_is_disabled(mlflow.spark.FLAVOR_NAME)

    with mock.patch("mlflow.spark.autolog", wraps=mlflow.spark.autolog) as autolog_mock:
        # there should be no import hook on pyspark since autologging was already
        # applied to an active spark session
        mlflow.utils.import_hooks.notify_module_loaded(pyspark)
        autolog_mock.assert_not_called()
示例#6
0
def test_autologging_is_disabled_returns_expected_values():

    assert autologging_is_disabled("nonexistent_integration") is True

    @autologging_integration("test_integration_for_disable_check")
    def autolog(disable=False, silent=False):
        pass

    # Before `autolog()` has been invoked, `autologging_is_disabled` should return False
    assert autologging_is_disabled("test_integration_for_disable_check") is True

    autolog(disable=True)

    assert autologging_is_disabled("test_integration_for_disable_check") is True

    autolog(disable=False)

    assert autologging_is_disabled("test_integration_for_disable_check") is False
示例#7
0
 def _notify(self, path, version, data_format):
     """
     Method called by Scala SparkListener to propagate datasource read events to the current
     Python process
     """
     if autologging_is_disabled(FLAVOR_NAME):
         return
     # If there's an active run, simply set the tag on it
     # Note that there's a TOCTOU race condition here - active_run() here can actually throw
     # if the main thread happens to end the run & pop from the active run stack after we check
     # the stack size but before we peek
     active_run = mlflow.active_run()
     if active_run:
         _set_run_tag_async(active_run.info.run_id, path, version,
                            data_format)
     else:
         add_table_info_to_context_provider(path, version, data_format)
示例#8
0
 def tags(self):
     # if autologging is disabled, then short circuit `tags()` and return empty dict.
     if autologging_is_disabled(FLAVOR_NAME):
         return {}
     with _lock:
         global _table_infos
         seen = set()
         unique_infos = []
         for info in _table_infos:
             if info not in seen:
                 unique_infos.append(info)
                 seen.add(info)
         if len(unique_infos) > 0:
             tags = {
                 _SPARK_TABLE_INFO_TAG_NAME:
                 "\n".join([
                     _get_table_info_string(*info) for info in unique_infos
                 ])
             }
         else:
             tags = {}
         return tags
示例#9
0
    def safe_patch_function(*args, **kwargs):
        """
        A safe wrapper around the specified `patch_function` implementation designed to
        handle exceptions thrown during the execution of `patch_function`. This wrapper
        distinguishes exceptions thrown from the underlying / original function
        (`<destination>.<function_name>`) from exceptions thrown from other parts of
        `patch_function`. This distinction is made by passing an augmented version of the
        underlying / original function to `patch_function` that uses nonlocal state to track
        whether or not it has been executed and whether or not it threw an exception.
        Exceptions thrown from the underlying / original function are propagated to the caller,
        while exceptions thrown from other parts of `patch_function` are caught and logged as
        warnings.
        """
        # Reroute warnings encountered during the patch function implementation to an MLflow event
        # logger, and enforce silent mode if applicable (i.e. if the corresponding autologging
        # integration was called with `silent=True`), hiding MLflow event logging statements and
        # hiding all warnings in the autologging preamble and postamble (i.e. the code surrounding
        # the user's original / underlying ML function). Non-MLflow warnings are enabled during the
        # execution of the original / underlying ML function
        #
        # Note that we've opted *not* to apply this context manager as a decorator on
        # `safe_patch_function` because the context-manager-as-decorator pattern uses
        # `contextlib.ContextDecorator`, which creates generator expressions that cannot be pickled
        # during model serialization by ML frameworks such as scikit-learn
        is_silent_mode = get_autologging_config(autologging_integration, "silent", False)
        with set_mlflow_events_and_warnings_behavior_globally(
            # MLflow warnings emitted during autologging training sessions are likely not
            # actionable and result from the autologging implementation invoking another MLflow
            # API. Accordingly, we reroute these warnings to the MLflow event logger with level
            # WARNING For reference, see recommended warning and event logging behaviors from
            # https://docs.python.org/3/howto/logging.html#when-to-use-logging
            reroute_warnings=True,
            disable_event_logs=is_silent_mode,
            disable_warnings=is_silent_mode,
        ), set_non_mlflow_warnings_behavior_for_current_thread(
            # non-MLflow Warnings emitted during the autologging preamble (before the original /
            # underlying ML function is called) and postamble (after the original / underlying ML
            # function is called) are likely not actionable and result from the autologging
            # implementation invoking an API from a dependent library. Accordingly, we reroute
            # these warnings to the MLflow event logger with level WARNING. For reference, see
            # recommended warning and event logging behaviors from
            # https://docs.python.org/3/howto/logging.html#when-to-use-logging
            reroute_warnings=True,
            disable_warnings=is_silent_mode,
        ):

            if is_testing():
                preexisting_run_for_testing = mlflow.active_run()

            # Whether or not to exclude autologged content from user-created fluent runs
            # (i.e. runs created manually via `mlflow.start_run()`)
            exclusive = get_autologging_config(autologging_integration, "exclusive", False)
            user_created_fluent_run_is_active = (
                mlflow.active_run() and not _AutologgingSessionManager.active_session()
            )
            active_session_failed = (
                _AutologgingSessionManager.active_session() is not None
                and _AutologgingSessionManager.active_session().state == "failed"
            )

            if (
                active_session_failed
                or autologging_is_disabled(autologging_integration)
                or (user_created_fluent_run_is_active and exclusive)
                or mlflow.utils.autologging_utils._AUTOLOGGING_GLOBALLY_DISABLED
            ):
                # If the autologging integration associated with this patch is disabled,
                # or if the current autologging integration is in exclusive mode and a user-created
                # fluent run is active, call the original function and return. Restore the original
                # warning behavior during original function execution, since autologging is being
                # skipped
                with set_non_mlflow_warnings_behavior_for_current_thread(
                    disable_warnings=False, reroute_warnings=False,
                ):
                    return original(*args, **kwargs)

            # Whether or not the original / underlying function has been called during the
            # execution of patched code
            original_has_been_called = False
            # The value returned by the call to the original / underlying function during
            # the execution of patched code
            original_result = None
            # Whether or not an exception was raised from within the original / underlying function
            # during the execution of patched code
            failed_during_original = False
            # The active MLflow run (if any) associated with patch code execution
            patch_function_run_for_testing = None

            def try_log_autologging_event(log_fn, *args):
                try:
                    log_fn(*args)
                except Exception as e:
                    _logger.debug(
                        "Failed to log autologging event via '%s'. Exception: %s", log_fn, e,
                    )

            with _AutologgingSessionManager.start_session(autologging_integration) as session:
                try:

                    def call_original(*og_args, **og_kwargs):
                        try:
                            try_log_autologging_event(
                                AutologgingEventLogger.get_logger().log_original_function_start,
                                session,
                                destination,
                                function_name,
                                og_args,
                                og_kwargs,
                            )

                            if is_testing():
                                _validate_args(args, kwargs, og_args, og_kwargs)
                                # By the time `original` is called by the patch implementation, we
                                # assume that either: 1. the patch implementation has already
                                # created an MLflow run or 2. the patch code will not create an
                                # MLflow run during the current execution. Here, we capture a
                                # reference to the active run, which we will use later on to
                                # determine whether or not the patch implementation created
                                # a run and perform validation if necessary
                                nonlocal patch_function_run_for_testing
                                patch_function_run_for_testing = mlflow.active_run()

                            nonlocal original_has_been_called
                            original_has_been_called = True

                            nonlocal original_result
                            # Show all non-MLflow warnings as normal (i.e. not as event logs)
                            # during original function execution, even if silent mode is enabled
                            # (`silent=True`), since these warnings originate from the ML framework
                            # or one of its dependencies and are likely relevant to the caller
                            with set_non_mlflow_warnings_behavior_for_current_thread(
                                disable_warnings=False, reroute_warnings=False,
                            ):
                                original_result = original(*og_args, **og_kwargs)

                            try_log_autologging_event(
                                AutologgingEventLogger.get_logger().log_original_function_success,
                                session,
                                destination,
                                function_name,
                                og_args,
                                og_kwargs,
                            )

                            return original_result
                        except Exception as e:
                            try_log_autologging_event(
                                AutologgingEventLogger.get_logger().log_original_function_error,
                                session,
                                destination,
                                function_name,
                                og_args,
                                og_kwargs,
                                e,
                            )

                            nonlocal failed_during_original
                            failed_during_original = True
                            raise

                    # Apply the name, docstring, and signature of `original` to `call_original`.
                    # This is important because several autologging patch implementations inspect
                    # the signature of the `original` argument during execution
                    call_original = update_wrapper_extended(call_original, original)

                    try_log_autologging_event(
                        AutologgingEventLogger.get_logger().log_patch_function_start,
                        session,
                        destination,
                        function_name,
                        args,
                        kwargs,
                    )

                    if patch_is_class:
                        patch_function.call(call_original, *args, **kwargs)
                    else:
                        patch_function(call_original, *args, **kwargs)

                    session.state = "succeeded"

                    try_log_autologging_event(
                        AutologgingEventLogger.get_logger().log_patch_function_success,
                        session,
                        destination,
                        function_name,
                        args,
                        kwargs,
                    )
                except Exception as e:
                    session.state = "failed"

                    # Exceptions thrown during execution of the original function should be
                    # propagated to the caller. Additionally, exceptions encountered during test
                    # mode should be reraised to detect bugs in autologging implementations
                    if failed_during_original or is_testing():
                        raise

                    try_log_autologging_event(
                        AutologgingEventLogger.get_logger().log_patch_function_error,
                        session,
                        destination,
                        function_name,
                        args,
                        kwargs,
                        e,
                    )

                    _logger.warning(
                        "Encountered unexpected error during %s autologging: %s",
                        autologging_integration,
                        e,
                    )

                if is_testing() and not preexisting_run_for_testing:
                    # If an MLflow run was created during the execution of patch code, verify that
                    # it is no longer active and that it contains expected autologging tags
                    assert not mlflow.active_run(), (
                        "Autologging integration %s leaked an active run" % autologging_integration
                    )
                    if patch_function_run_for_testing:
                        _validate_autologging_run(
                            autologging_integration, patch_function_run_for_testing.info.run_id
                        )

                if original_has_been_called:
                    return original_result
                else:
                    return original(*args, **kwargs)