示例#1
0
def export_db(
    archive,
    include_db=True,
    include_logs=True,
    task_version=utcnow().strftime("%Y%m%d_%H%M%S"),
):
    # type: (Path, bool, bool, str)-> None

    from dbnd._core.current import get_databand_context

    logger.info("Compressing files to %s..." % archive)
    with tarfile.open(str(archive), "w:gz") as tar:

        if include_db:
            dbnd_context = get_databand_context()
            conn_string = dbnd_context.config.get("webserver", "sql_alchemy_conn")
            if conn_string.startswith("postgresql"):
                with tempfile.NamedTemporaryFile(prefix="dbdump.", suffix=".sql") as tf:
                    dump_postgres(conn_string, tf.name)
                    tar.add(tf.name, arcname="postgres-dbnd.sql")
            else:
                raise DatabandRuntimeError(
                    "Can not export db! "
                    "Currently, we support only sqlite and postgres db in automatic export"
                )

        if include_logs:
            context = get_databand_context()
            local_env = context.settings.get_env_config(CloudType.local)
            logs_folder = local_env.dbnd_local_root.folder("logs").path
            if os.path.exists(logs_folder):
                logger.info("Adding run folder from '%s'", logs_folder)
                tar.add(logs_folder, "run")
            else:
                logger.warning("Logs dir '%s' is not found", logs_folder)
示例#2
0
    def build_dbnd_task(self, task_name, task_kwargs=None, expected_type=None):
        task_kwargs = task_kwargs or dict()
        task_kwargs.setdefault("task_name", task_name)

        task_cls = self.get_task_cls(task_name)  # type: Type[Task]
        if is_airflow_enabled():
            from dbnd_airflow.dbnd_task_executor.airflow_operator_as_dbnd import (
                AirflowDagAsDbndTask, )

            if issubclass(task_cls, AirflowDagAsDbndTask):
                # we are running old style dag
                dag = self._get_aiflow_dag(task_name)
                airflow_task = AirflowDagAsDbndTask.build_dbnd_task_from_dag(
                    dag=dag)
                return airflow_task

        try:
            logger.debug("Building %s task",
                         task_cls.task_definition.full_task_family)
            obj = task_cls(**task_kwargs)

        except Exception:
            exc = get_databand_context().settings.log.format_exception_as_str(
                sys.exc_info(), isolate=True)
            logger.error("Failed to build %s: \n\n%s",
                         task_cls.get_task_family(), exc)
            raise
        if expected_type and not issubclass(task_cls, expected_type):
            raise friendly_error.task_registry.wrong_type_for_task(
                task_name, task_cls, expected_type)
        return obj
示例#3
0
    def __init__(
        self,
        task_name,
        task_definition,  # type: TaskDefinition
        task_signature_obj,
        task_params,  # type: Parameters
    ):
        super(_BaseTask, self).__init__()
        self.task_definition = task_definition
        self.task_params = task_params
        self.task_name = task_name  # type: str

        # miscellaneous
        self.task_id = "{}__{}".format(task_name, task_signature_obj.signature)
        self.task_type = self._conf__task_type_name

        self.task_signature_obj = task_signature_obj
        self.task_outputs_signature_obj = None

        # define it at the time of creation
        # we can remove the strong reference, the moment we have global cache for instances
        # otherwise if we moved to another databand_context, task_id relationships will not be found
        self.dbnd_context = get_databand_context()

        # we count task meta creation
        # even if cached task is going to be used we will increase creation id
        # if t2 created after t1, t2.task_creation_id > t1.task_creation_id
        _BaseTask._current_task_creation_id += 1
        self.task_creation_id = _BaseTask._current_task_creation_id
示例#4
0
def get_job_by_name(job_name):
    query_params = build_query_api_params(
        filters=build_job_name_filter(job_name))

    job_result = _get_job(get_databand_context().databand_api_client,
                          query_params)
    return first(job_result, None)
示例#5
0
    def __init__(self, config, task_cls, task_args, task_kwargs):
        # type:(DbndConfig, Type[_BaseTask], Any, Any)->None
        self.task_cls = task_cls
        self.task_definition = task_cls.task_definition  # type: TaskDefinition

        # keep copy of user inputs
        self.task_kwargs__ctor = task_kwargs.copy()
        self.task_args__ctor = list(task_args)

        self.parent_task = try_get_current_task()

        # let find if we are running this constructor withing another Databand Task
        self.dbnd_context = get_databand_context()
        self.task_call_source = [
            self.dbnd_context.user_code_detector.find_user_side_frame(2)
        ]
        if self.task_call_source and self.parent_task:
            self.task_call_source.extend(
                self.parent_task.task_meta.task_call_source)

        self.task_family = self.task_definition.task_family
        self.task_name = self.task_family

        self.multi_sec_conf = MultiSectionConfig(config, [])

        self._task_params = self.task_definition._task_params.clone()

        self.ctor_kwargs = {}

        self._exc_desc = self.task_family
        self.task_errors = []
示例#6
0
def get_scheduled_jobs(name_pattern=None,
                       from_file_only=False,
                       include_deleted=False,
                       ctx=None):
    ctx = ctx or get_databand_context()
    schema = ScheduledJobSchemaV2(strict=False)
    query_filter = []
    if from_file_only:
        query_filter.append({"name": "from_file", "op": "eq", "val": True})

    if not include_deleted:
        query_filter.append({
            "name": "deleted_from_file",
            "op": "eq",
            "val": False
        })

    if name_pattern:
        query_filter.append({
            "name": "name",
            "op": "like",
            "val": name_pattern
        })

    query = {"filter": json.dumps(query_filter)}

    res = ctx.databand_api_client.api_request("/api/v1/scheduled_jobs",
                                              None,
                                              method="GET",
                                              query=query,
                                              no_prefix=True)
    return schema.load(data=res["data"], many=True).data
示例#7
0
def create_alert(
    job_name,
    job_id,
    task_name,
    uid,
    alert_class,
    severity,
    operator,
    value,
    user_metric,
):
    """add alert for existing job or scheduled job"""
    alert = _build_alert(
        job_name,
        job_id,
        task_name,
        uid,
        alert_class,
        severity,
        operator,
        value,
        user_metric,
    )
    alert_def_uid = _post_alert(get_databand_context().databand_api_client,
                                alert)
    return alert_def_uid
示例#8
0
def _sig(task):
    with new_databand_run(context=get_databand_context(), job_name=task.task_name):
        for child in [task] + list(task.descendants.get_children()):
            name = "signature %s" % child.task_name
            logger.info(child.ctrl.banner(name))

    return task.task_signature
示例#9
0
def log_exception_to_server(exception=None):
    try:
        from dbnd._core.current import get_databand_context

        client = get_databand_context().databand_api_client
        if client is None or not client.is_configured():
            return

        e_type, e_value, e_traceback = sys.exc_info()
        if exception:
            e_type, e_value, e_traceback = (
                type(exception),
                exception,
                exception.__traceback__,
            )

        trace = _format_exception(e_type, e_value, e_traceback)

        data = {
            "dbnd_version": dbnd.__version__,
            "source": "tracking-sdk",
            "stack_trace": trace,
            "timestamp": utcnow().isoformat(),
        }
        return client.api_request(endpoint="log_exception",
                                  method="POST",
                                  data=data)
    except Exception:  # noqa
        logger.debug("Error sending monitoring exception message",
                     exc_info=True)
示例#10
0
def edit_airflow_instance(
    tracking_source_uid,
    url,
    external_url,
    fetcher,
    env,
    dag_ids,
    last_seen_dag_run_id,
    last_seen_log_id,
    name,
    system_alert_definitions,
    monitor_config,
):
    client = get_databand_context().databand_api_client
    endpoint = "airflow_monitor/edit"
    request_data = {
        "tracking_source_uid": tracking_source_uid,
        "base_url": url,
        "external_url": external_url,
        "fetcher": fetcher,
        "env": env,
        "monitor_config": monitor_config,
        "dag_ids": dag_ids,
        "last_seen_dag_run_id": last_seen_dag_run_id,
        "last_seen_log_id": last_seen_log_id,
        "name": name,
        "system_alert_definitions": system_alert_definitions,
    }

    client.api_request(endpoint, request_data, method="POST")
示例#11
0
def send_heartbeat(run_uid, databand_url, heartbeat_interval, driver_pid,
                   tracker, tracker_api):
    from dbnd import config
    from dbnd._core.settings import CoreConfig
    from dbnd._core.task_executor.heartbeat_sender import send_heartbeat_continuously

    with config({
            "core": {
                "tracker": tracker.split(","),
                "tracker_api": tracker_api,
                "databand_url": databand_url,
            }
    }):
        requred_context = []
        if tracker_api == "db":
            from dbnd import new_dbnd_context

            requred_context.append(
                new_dbnd_context(name="send_heartbeat",
                                 autoload_modules=False))

        with nested_context.nested(*requred_context):
            tracking_store = get_databand_context().tracking_store

            send_heartbeat_continuously(run_uid, tracking_store,
                                        heartbeat_interval, driver_pid)
示例#12
0
def post_scheduled_job(scheduled_job_dict, ctx=None):
    ctx = ctx or get_databand_context()
    schema = ScheduledJobSchemaV2(strict=False)
    data, _ = schema.dump({"DbndScheduledJob": scheduled_job_dict})
    response = ctx.databand_api_client.api_request("/api/v1/scheduled_jobs",
                                                   data,
                                                   method="POST",
                                                   no_prefix=True)
    return schema.load(data=response).data
示例#13
0
def delete_scheduled_job(scheduled_job_name, revert=False, ctx=None):
    ctx = ctx or get_databand_context()
    ctx.databand_api_client.api_request(
        "/api/v1/scheduled_jobs?name=%s&revert=%s" %
        (scheduled_job_name, str(revert).lower()),
        None,
        method="DELETE",
        no_prefix=True,
    )
示例#14
0
def set_scheduled_job_active(name, value, ctx=None):
    ctx = ctx or get_databand_context()
    ctx.databand_api_client.api_request(
        "/api/v1/scheduled_jobs/set_active?name=%s&value=%s" %
        (name, str(value).lower()),
        None,
        method="PUT",
        no_prefix=True,
    )
示例#15
0
文件: task.py 项目: databand-ai/dbnd
    def dbnd_run(self):
        # type: (...)-> DatabandRun
        """Run task via Databand execution system."""
        # this code should be executed under context!
        from dbnd._core.current import get_databand_context

        ctx = get_databand_context()
        run = ctx.dbnd_run_task(self)
        return run
示例#16
0
def _sig(task):
    name = "signature %s" % task.task_name
    with new_databand_run(context=get_databand_context(),
                          task_or_task_name=task):
        logger.info(task.ctrl.banner(name))
        for child in task.task_meta.get_children():
            logger.info(child.ctrl.banner(name))

    return task.task_signature
示例#17
0
    def test_prod_immutable_output_dict_prod(self):
        env = get_databand_context().env
        prod_env = env.clone(production=True)
        task = TProdImmutbaleOutputs(task_env=prod_env)
        assert task.task_enabled_in_prod
        assert task.task_signature[:5] not in str(task.splits)
        config.log_current_config()
        actual = assert_run_task(task)

        print(actual)
示例#18
0
def get_scheduled_job_by_name(job_name):
    query_params = build_query_api_params(filters=build_scheduled_job_filter(
        job_name=job_name))

    job_result = _get_scheduled_jobs(
        get_databand_context().databand_api_client, query_params)
    try:
        return first(job_result)["DbndScheduledJob"]
    except ValueError:
        return None
示例#19
0
文件: helpers.py 项目: cxz/dbnd
def run_dbnd_subprocess(args,
                        retcode=255,
                        clean_env=False,
                        blocking=True,
                        **kwargs):
    # implement runner with https://docs.pytest.org/en/latest/capture.html
    # do not run in subprocess
    # main.main(['run', '--module', str(factories.__name__), ] + args)
    # return
    kwargs = kwargs.copy()
    cmd_args = list(map(str, args))
    env = kwargs.pop("env", os.environ).copy()

    if clean_env:
        for key in list(env.keys()):
            if key.startswith("DBND") or key.startswith("AIRFLOW"):
                del env[key]

    # env['PYTHONUNBUFFERED'] = 'false'
    # env['PYTHONPATH'] = env.get('PYTHONPATH', '') + ':.:test'

    from dbnd._core.current import get_databand_context

    env["DBND__CORE__SQL_ALCHEMY_CONN"] = get_databand_context(
    ).settings.core.get_sql_alchemy_conn()

    cmd_line = list2cmdline(cmd_args)

    logger.info("Running at %s: %s", kwargs.get("cwd", "current dir"),
                cmd_line)  # To simplify rerunning failing tests

    if blocking:
        try:
            output = fast_subprocess.check_output(cmd_args,
                                                  stderr=subprocess.STDOUT,
                                                  env=env,
                                                  **kwargs)
            # we don't decode ascii //.decode("ascii")
            output = output.decode("utf-8")
            logger.info("Cmd line %s output:\n %s", cmd_line, output)
            return output
        except subprocess.CalledProcessError as ex:
            logger.error(
                "Failed to run %s :\n\n\n -= Output =-\n%s\n\n\n -= See output above =-",
                cmd_line,
                ex.output.decode("utf-8", errors="ignore"),
            )
            if ex.returncode == retcode:
                return ex.output.decode("utf-8")
            raise ex
    else:
        return subprocess.Popen(cmd_args,
                                stderr=subprocess.STDOUT,
                                env=env,
                                **kwargs)
示例#20
0
def create_airflow_instance(
    url,
    external_url,
    fetcher,
    env,
    dag_ids,
    last_seen_dag_run_id,
    last_seen_log_id,
    name,
    generate_token,
    system_alert_definitions,
    monitor_config,
):
    client = get_databand_context().databand_api_client
    endpoint = "airflow_monitor/add"
    request_data = {
        "base_url": url,
        "external_url": external_url,
        "fetcher": fetcher,
        "env": env,
        "monitor_config": monitor_config,
        "dag_ids": dag_ids,
        "last_seen_dag_run_id": last_seen_dag_run_id,
        "last_seen_log_id": last_seen_log_id,
        "name": name,
        "system_alert_definitions": system_alert_definitions,
    }

    if monitor_config:
        request_data["monitor_config"] = monitor_config

    resp = client.api_request(endpoint, request_data, method="POST")
    config_json = resp["server_info_dict"]
    config_json["core"]["databand_url"] = get_databand_context(
    ).settings.core.databand_url

    if generate_token:
        token_resp = generate_access_token(name, generate_token)

        config_json["core"]["databand_access_token"] = token_resp["token"]

    return config_json
示例#21
0
def generate_access_token(name, lifespan):
    client = get_databand_context().databand_api_client
    resp = client.api_request(
        "/api/v1/auth/personal_access_token",
        {
            "label": name,
            "lifespan": lifespan
        },
        method="POST",
    )
    return resp
示例#22
0
def get_alerts_filtered(alert_def_uid=None, job_name=None, custom_name=None):
    """get alerts by filters"""
    params = {
        "uids": alert_def_uid,
        "job_name": job_name,
        "custom_name": custom_name
    }
    query_params = "&".join(f"{key}={value}" for key, value in params.items()
                            if value is not None)
    return _get_alerts(get_databand_context().databand_api_client,
                       query_params)
示例#23
0
def create_airflow_instance(url, external_url, fetcher, api_mode, composer_client_id):
    client = get_databand_context().databand_api_client
    endpoint = "airflow_monitor/add"
    request_data = {
        "base_url": url,
        "ext_url": external_url,
        "fetcher": fetcher,
        "api_mode": api_mode,
        "composer_client_id": composer_client_id,
    }
    client.api_request(endpoint, request_data, method="POST")
示例#24
0
def get_scheduled_jobs(name_pattern=None,
                       from_file_only=None,
                       include_deleted=False,
                       ctx=None):
    ctx = ctx or get_databand_context()
    query_params = build_query_api_params(filters=build_scheduled_job_filter(
        name_pattern=name_pattern,
        from_file_flag=from_file_only,
        include_deleted_flag=include_deleted,
    ))
    return _get_scheduled_jobs(ctx.databand_api_client, query_params)
示例#25
0
    def from_databand_context(cls, name=None):
        """Syntax sugar for accessing the current config instance."""
        from dbnd._core.current import get_databand_context

        if not name:
            if cls._conf__task_family:
                # using the current cls section name to get the current instance of a class
                name = cls._conf__task_family
            else:
                raise ConfigLookupError(
                    "name is required for retrieving a config instance"
                )

        return get_databand_context().settings.get_config(name)
示例#26
0
    def parse_from_str(self, s):
        s = super(VersionValueType, self).parse_from_str(s)
        if not s:
            return s

        if s.lower() == VersionAlias.now:
            return utcnow().strftime("%Y%m%d_%H%M%S")

        if s.lower() == VersionAlias.context_uid:
            return get_databand_context().current_context_uid

        if s.lower() == VersionAlias.git:
            return str(get_project_git())

        return s
示例#27
0
    def _create_task(cls, args, kwargs):
        task_definition = cls.task_definition
        # we need to have context initialized before we start to run all logic in config() scope
        # update config with current class defaults
        # we apply them to config only if there are no values (this is defaults)
        with config(
                config_values=task_definition.task_defaults_config_store,
                source=task_definition.task_passport.format_source_name(
                    "defaults"),
                merge_settings=ConfigMergeSettings.on_non_exists_only,
        ) as task_config:

            tracking_mode = TaskEssence.TRACKING.is_included(cls)

            # create task meta first
            task_meta_factory = (TrackedTaskMetaFactory
                                 if tracking_mode else TaskMetaFactory)
            factory = task_meta_factory(config=task_config,
                                        task_cls=cls,
                                        task_args=args,
                                        task_kwargs=kwargs)
            task_meta = factory.create_dbnd_task_meta()

            # If a Task has already been instantiated with the same parameters,
            # the previous instance is returned to reduce number of object instances.
            tic = get_databand_context().task_instance_cache
            task = tic.get_task_obj_by_id(task_meta.obj_key.id)
            if not task or tracking_mode or hasattr(task, "_dbnd_no_cache"):
                task = cls._build_task_obj(task_meta)
                tic.register_task_obj_instance(task)

                # now the task is created - all nested constructors will see it as parent
                with task_context(task, TaskContextPhase.BUILD):
                    task._initialize()
                    task._validate()
                    task.task_meta.config_layer = config.config_layer

                tic.register_task_instance(task)

            parent_task = try_get_current_task()
            if (parent_task and hasattr(task, "task_id")
                    and (task.task_essence != TaskEssence.CONFIG)):
                parent_task.descendants.add_child(task.task_id)

            return task
示例#28
0
def create_alert(
    job_name,
    job_id,
    task_name,
    task_repr,
    uid,
    alert_class,
    severity,
    operator,
    value,
    user_metric,
    alert_on_historical_runs=True,
    is_str_value=False,
    **optional_fields,
):
    alert = {
        "type": alert_class,
        "job_name": job_name,
        "job_id": job_id,
        "task_name": task_name,
        "task_repr": task_repr,
        "uid": uid,
        "operator": operator,
        "value": value,
        "severity": severity,
        "user_metric": user_metric,
        "alert_on_historical_runs": alert_on_historical_runs,
        "is_str_value": is_str_value,
    }
    if operator == AlertDefOperator.ANOMALY.value:
        alert["ml_alert"] = {
            "look_back": optional_fields["look_back"],
            "sensitivity": optional_fields["sensitivity"],
        }
    if operator == AlertDefOperator.RANGE.value:
        alert["operator"] = "RangeAlert"
        alert["value"] = json.dumps({
            "baseline": optional_fields["baseline"],
            "range": optional_fields["range"]
        })
    alert = {key: value for key, value in alert.items() if value is not None}
    alert_def_uid = _post_alert(get_databand_context().databand_api_client,
                                alert)
    return alert_def_uid
示例#29
0
def create_dbnd_task(config, new_task_factory, task_cls, task_args, task_kwargs):
    # type:(DbndConfig, Any, Type[_BaseTask], Any, Any, bool)->None
    tracking_mode = task_cls.is_tracking_mode

    task_meta_factory = TrackedTaskMetaFactory if tracking_mode else TaskMetaFactory
    factory = task_meta_factory(
        config=config, task_cls=task_cls, task_args=task_args, task_kwargs=task_kwargs,
    )

    task_meta = factory.create_dbnd_task_meta()

    # If a Task has already been instantiated with the same parameters,
    # the previous instance is returned to reduce number of object instances.
    tic = get_databand_context().task_instance_cache
    task = tic.get_task_obj_by_id(task_meta.obj_key.id)
    if not task or tracking_mode or hasattr(task, "_dbnd_no_cache"):
        task = new_task_factory(task_meta)
        tic.register_task_obj_instance(task)

        # now the task is created - all nested constructors will see it as parent
        with task_context(task, TaskContextPhase.BUILD):
            task._initialize()
            task._validate()
            task.task_meta.config_layer = config.config_layer

        tic.register_task_instance(task)

    parent_task = try_get_current_task()
    if (
        parent_task
        and hasattr(task, "task_id")
        and isinstance(task, _TaskParamContainer)
    ):
        parent_task.task_meta.add_child(task.task_id)

    return task
示例#30
0
文件: alerts.py 项目: Dtchil/dbnd
def delete_alerts(uids):
    """delete alerts by uids"""
    get_databand_context().databand_api_client.api_request(
        endpoint="alert_defs/delete", data=uids, method="POST"
    )