Пример #1
0
def log_metrics(metrics_dict, source="user", timestamp=None):
    # type: (Dict[str, Any], str, datetime) -> None
    """
    Log multiple key-value pairs as metrics to dbnd.

    Args:
        metrics_dict: Key-value pairs of metrics to log.
        source: Optional name of the metrics source, default is user.
        timestamp: Optional timestamp of the metrics.

    Example::

        @task
        def log_lowercase_letters():
            # all lower alphabet chars -> {"a": 97,..., "z": 122}
            log_metrics({chr(i): i for i in range(97, 123)})
    """
    tracker = _get_tracker()
    if tracker:
        tracker.log_metrics(metrics_dict, source=source, timestamp=timestamp)
        return

    message = TRACKER_MISSING_MESSAGE % ("log_metrics", )
    get_one_time_logger().log_once(message, "log_metrics", logging.WARNING)
    logger.info("Log multiple metrics from {source}: {metrics}".format(
        source=source.capitalize(), metrics=metrics_dict))
Пример #2
0
def log_metric(key, value, source="user"):
    # type: (str, Any, Optional[str]) -> None
    """
    Log key-value pair as a metric to dbnd.

    Args:
        key: Name of the metric.
        value: Value of the metric.
        source: The source of the metric, default is user.

    Example::

        def calculate_alpha(alpha):
            alpha *= 1.1
            log_metric("alpha", alpha)
    """
    tracker = _get_tracker()
    if tracker:
        tracker.log_metric(key, value, source=source)
        return

    message = TRACKER_MISSING_MESSAGE % ("log_metric", )
    get_one_time_logger().log_once(message, "log_metric", logging.WARNING)
    logger.info("Log {} Metric '{}'='{}'".format(source.capitalize(), key,
                                                 value))
    return
Пример #3
0
def _report_operation(operation_report):
    # type: (DatasetOperationReport) -> None
    tracker = _get_tracker()
    if not tracker:
        message = TRACKER_MISSING_MESSAGE % ("report_operation", )
        get_one_time_logger().log_once(message, "report_operation",
                                       logging.WARNING)
        return

    tracker.log_dataset(op_report=operation_report)
Пример #4
0
def _report_dbt_metadata(dbt_metadata, tracker=None):
    if tracker is None:
        tracker = _get_tracker()

    if not tracker:
        message = TRACKER_MISSING_MESSAGE % ("report_dbt_metadata",)
        get_one_time_logger().log_once(message, "report_dbt_metadata", logging.WARNING)
        return

    tracker.log_dbt_metadata(dbt_metadata=dbt_metadata)
Пример #5
0
def log_data(
        key,  # type: str
        value=None,  # type: Union[pd.DataFrame, spark.DataFrame, PostgresTable, SnowflakeTable]
        path=None,  # type: Optional[str]
        operation_type=DbndTargetOperationType.
    read,  # type: DbndTargetOperationType
        with_preview=None,  # type: Optional[bool]
        with_size=None,  # type: Optional[bool]
        with_schema=None,  # type: Optional[bool]
        with_stats=None,  # type: Optional[Union[bool, str, List[str], LogDataRequest]]
        with_histograms=None,  # type: Optional[Union[bool, str, List[str], LogDataRequest]]
        raise_on_error=False,  # type: bool
):  # type: (...) -> None
    """
    Log data information to dbnd.

    @param key: Name of the data.
    @param value: Value of the data, currently supporting only dataframes and tables view.
    @param path: Optional target or path representing a target to connect the data to.
    @param operation_type: Type of the operation doing with the target - reading or writing the data?
    @param with_preview: True if should log a preview of the data.
    @param with_size: True if should log the size of the data.
    @param with_schema: True if should log the schema of the data.
    @param with_stats: True if should calculate and log stats of the data.
    @param with_histograms: True if should calculate and log histogram of the data.
    @param raise_on_error: raise if error occur.
    """
    tracker = _get_tracker()
    if not tracker:
        message = TRACKER_MISSING_MESSAGE % ("log_data", )
        get_one_time_logger().log_once(message, "log_data", logging.WARNING)
        return

    meta_conf = ValueMetaConf(
        log_preview=with_preview,
        log_schema=with_schema,
        log_size=with_size,
        log_stats=with_stats,
        log_histograms=with_histograms,
    )

    tracker.log_data(
        key,
        value,
        meta_conf=meta_conf,
        path=path,
        operation_type=operation_type,
        raise_on_error=raise_on_error,
    )
Пример #6
0
def log_artifact(key, artifact):
    """
    Log a local file or directory as an artifact of the currently active run.

    Args:
        key: The key by which to log the artifact
        artifact: The artifact to log

    Example::

        def prepare_data(data):
            lorem = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt"
            data.write(lorem)
            log_artifact("my_tmp_file", str(data))
    """
    tracker = _get_tracker()
    if tracker:
        tracker.log_artifact(key, artifact)
        return

    message = TRACKER_MISSING_MESSAGE % ("log_artifact", )
    get_one_time_logger().log_once(message, "log_artifact", logging.WARNING)
    logger.info("Artifact %s=%s", key, artifact)
Пример #7
0
    def handle_callable_call(self, *call_args, **call_kwargs):
        dbnd_project_config = get_dbnd_project_config()
        if dbnd_project_config.disabled:
            return self.class_or_func(*call_args, **call_kwargs)

        # we are at tracking mode
        if dbnd_project_config.is_tracking_mode():
            with self.tracking_context(call_args, call_kwargs) as track_result_callback:
                fp_result = self.class_or_func(*call_args, **call_kwargs)
                return track_result_callback(fp_result)

        #### DBND ORCHESTRATION MODE
        #
        #     -= Use "Step into My Code"" to get back from dbnd code! =-
        #
        # decorated object call/creation  ( my_func(), MyDecoratedTask()
        # we are at orchestration mode

        task_cls = self.get_task_cls()

        if is_in_airflow_dag_build_context():
            # we are in Airflow DAG building mode - AIP-31
            return build_task_at_airflow_dag_context(
                task_cls=task_cls, call_args=call_args, call_kwargs=call_kwargs
            )

        current = try_get_current_task()
        if not current:
            # no tracking/no orchestration,
            # falling back to "natural call" of the class_or_func
            message = (
                "Can't report tracking info. %s is decorated with @task, but no tracking context was found"
                % (self.class_or_func.__name__,)
            )
            get_one_time_logger().log_once(message, "task_decorator", logging.WARNING)
            return self.class_or_func(*call_args, **call_kwargs)

        ######
        # current is not None, and we are not in tracking/airflow/luigi
        # this is DBND Orchestration mode
        # we can be in the context of task.run() or in task.band()
        # called from user code using user_decorated_func()  or UserDecoratedTask()

        if self.is_class:
            call_kwargs.pop("__call_original_cls", False)

        # we should not get here from _TaskFromTaskDecorator.invoke()
        # at that function we should call user code directly
        phase = current_phase()
        if phase is TaskContextPhase.BUILD:
            # we are in the @pipeline.band() context, we are building execution plan
            t = task_cls(*call_args, **call_kwargs)

            # we are in the band, and if user_code() is called we want to remove redundant
            # `user_code().result` usage
            if t.task_definition.single_result_output:
                return t.result

            # we have multiple outputs (more than one "output" parameter)
            # just return task object, user will use it as `user_code().output_1`
            return t
        elif phase is TaskContextPhase.RUN:
            # we are "running" inside some other task execution (orchestration!)
            #  (inside user_defined_function() or UserDefinedTask.run()

            # if possible we will run it as "orchestration" task
            # with parameters parsing
            if (
                current.settings.run.task_run_at_execution_time_enabled
                and current.task_supports_dynamic_tasks
            ):
                return self._run_task_from_another_task_execution(
                    parent_task=current, call_args=call_args, call_kwargs=call_kwargs
                )
            # we can not call it in "dbnd" way, fallback to normal call
            if self.is_class:
                call_kwargs["__call_original_cls"] = False
            return self.class_or_func(*call_args, **call_kwargs)
        else:
            raise Exception()