示例#1
0
    def job_to_dag(self, job):  # type: (dict) -> Union[DAG, None]
        start_day = convert_to_utc(job.get("start_date", None))
        end_date = convert_to_utc(job.get("end_date", None))

        default_args = {
            "owner": job.get("create_user", None),
            "depends_on_past": job.get("depends_on_past", False),
            "start_date": job["start_date"],
            "end_date": end_date,
        }

        job_name = clean_job_name(job["name"])
        dag = DAG(
            "dbnd_launcher__%s" % job_name,
            start_date=start_day,
            default_args=default_args,
            schedule_interval=job.get("schedule_interval", None),
            catchup=job.get("catchup", False),
        )

        DbndSchedulerOperator(
            scheduled_cmd=job["cmd"],
            scheduled_job_name=job_name,
            scheduled_job_uid=job.get("uid", None),
            shell=config.getboolean("scheduler", "shell_cmd"),
            task_id="launcher",
            dag=dag,
            retries=job.get("retries", self.default_retries)
            or self.default_retries,
        )

        return dag
    def job_to_dag(self, job):  # type: (dict) -> Union[DAG, None]

        # convert_to_utc usage might be dangerous, as there is the same function at airflow
        # however, that one use pendulum not from _vendorized

        default_args = {}
        if job.get("depends_on_past"):
            default_args["depends_on_past"] = job.get("depends_on_past")

        start_date = convert_to_utc(job.get("start_date"))
        if start_date:
            default_args["start_day"] = start_date

        if job.get("end_date"):
            default_args["end_date"] = convert_to_utc(job.get("end_date"))

        if job.get("owner"):
            default_args["owner"] = job.get("create_user")

        job_name = clean_job_name(job["name"])
        dag = DAG(
            "%s" % job_name,
            start_date=start_date,
            default_args=default_args,
            schedule_interval=job.get("schedule_interval", None),
            catchup=job.get("catchup", False),
        )

        custom_operator_class = self.custom_operator_class or DbndSchedulerOperator
        custom_operator_class(
            scheduled_cmd=job["cmd"],
            scheduled_job_name=job_name,
            extra_args=job.get("extra_args", None),
            with_name=False,
            scheduled_job_uid=job.get("uid", None),
            shell=config.getboolean("scheduler", "shell_cmd"),
            task_id="launcher",
            dag=dag,
            retries=job.get("retries") or self.default_retries,
        )

        return dag
示例#3
0
def clean_name_dns1123(value, max_size, postfix=None):
    # type:(str, int, Optional[str]) -> str
    """
    Create a dns-1123 compatible name.

    @param value: the base value to transform
    @param max_size: the maximum length allowed for the output
    @param postfix: optional string to add to end of the result value
    @return: dns-1123 compatible name
    """
    cleaned_value = clean_job_name(
        value=value,
        enabled_characters="-.",
        placeholder="-",
        max_size=max_size,
        postfix=postfix,
    )

    # remove any none alphanumeric characters in the beginning and end of the cleaned value
    return strip_by(lambda c: not c.isalnum(), cleaned_value)
    def job_to_dag(self, job):  # type: (dict) -> Union[DAG, None]

        default_args = {}
        if job.get("depends_on_past"):
            default_args["depends_on_past"] = job.get("depends_on_past")

        start_date = convert_to_utc(job.get("start_date"))
        if start_date:
            default_args["start_day"] = start_date

        if job.get("end_date"):
            default_args["end_date"] = convert_to_utc(job.get("end_date"))

        if job.get("owner"):
            default_args["owner"] = job.get("owner")

        job_name = clean_job_name(job["name"])
        dag = DAG(
            "%s" % job_name,
            start_date=start_date,
            default_args=default_args,
            schedule_interval=job.get("schedule_interval", None),
            catchup=job.get("catchup", False),
        )

        DbndSchedulerOperator(
            task_id="launcher",
            dag=dag,
            retries=job.get("retries") or self.default_retries,
            scheduled_cmd=job["cmd"],
            scheduled_job_name=job_name,
            with_name=False,
            scheduled_job_uid=job.get("uid", None),
            shell=config.getboolean("scheduler", "shell_cmd"),
        )

        return dag
示例#5
0
    def __init__(
        self,
        task,
        run,
        task_af_id=None,
        try_number=1,
        is_dynamic=None,
        task_engine=None,
    ):
        # type: (Task, DatabandRun, str, int, bool, EngineConfig)-> None
        # actually this is used as Task uid

        self.task = task  # type: Task
        self.run = run  # type: DatabandRun
        self.task_engine = task_engine
        self.try_number = try_number
        self.is_dynamic = is_dynamic if is_dynamic is not None else task.task_is_dynamic
        self.is_system = task.task_is_system
        self.task_af_id = task_af_id or self.task.task_id

        if task.ctrl.force_task_run_uid:
            self.task_run_uid = tr_uid = task.ctrl.force_task_run_uid
            if isinstance(tr_uid, TaskRunUidGen):
                self.task_run_uid = tr_uid.generate_task_run_uid(
                    run=run, task=task, task_af_id=self.task_af_id
                )
        else:
            self.task_run_uid = get_uuid()

        # used by all kind of submission controllers
        self.job_name = clean_job_name(self.task_af_id).lower()
        self.job_id = self.job_name + "_" + str(self.task_run_uid)[:8]

        # DNS-1123 subdomain name (k8s)
        self.job_id__dns1123 = clean_job_name_dns1123(
            "dbnd.{task_family}.{task_name}".format(
                task_family=self.task.task_meta.task_family,
                task_name=self.task.task_meta.task_name,
            ),
            postfix=".%s" % str(self.task_run_uid)[:8],
        )

        # custom per task engine , or just use one from global env
        dbnd_local_root = (
            self.task_engine.dbnd_local_root or self.run.env.dbnd_local_root
        )
        self.local_task_run_root = (
            dbnd_local_root.folder(run.run_folder_prefix)
            .folder("tasks")
            .folder(self.task.task_id)
        )

        self._attempt_number = 1
        self.task_run_attempt_uid = get_uuid()
        self.attempt_folder = None
        self.meta_files = None
        self.log = None
        self.init_attempt()

        # TODO: inherit from parent task if disabled
        self.is_tracked = task._conf__tracked

        if self.is_tracked and self.run.is_tracked:
            tracking_store = self.run.context.tracking_store
        else:
            tracking_store = ConsoleStore()

        self.tracking_store = tracking_store
        self.tracker = TaskRunTracker(task_run=self, tracking_store=tracking_store)
        self.runner = TaskRunRunner(task_run=self)
        self.deploy = TaskSyncCtrl(task_run=self)
        self.task_tracker_url = self.tracker.task_run_url()
        self.external_resource_urls = dict()
        self.errors = []

        self.is_root = False
        self.is_reused = False
        self.is_skipped = False
        # Task can be skipped as it's not required by any other task scheduled to run
        self.is_skipped_as_not_required = False

        self._airflow_context = None
        self._task_run_state = None

        self.start_time = None
        self.finished_time = None
示例#6
0
 def test_clean_job_name_3(self):
     assert clean_job_name("AaBb[]1111", placeholder=r"-") == "aa-bb-1111"
示例#7
0
 def test_clean_job_name_1(self):
     assert clean_job_name("Aa[]1111") == "aa_1111"