Python SynchronizedQueue示例

编程语言: Python

命名空间/包名称: airflow.utils.synchronized_queue

hotexamples.com的示例: 7

Python SynchronizedQueue - 已找到7个示例。这些是从开源项目中提取的最受好评的airflow.utils.synchronized_queue.SynchronizedQueue现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

SynchronizedQueue(3)

empty(2)

get(2)

qsize(1)

示例#1

显示文件

文件： kubernetes_executor.py 项目： Orpheus11/airflow-forked

 def __init__(self, kube_config, task_queue, result_queue, kube_client,
              worker_uuid):
     self.log.debug("Creating Kubernetes executor")
     self.kube_config = kube_config
     self.task_queue = task_queue
     self.result_queue = result_queue
     self.namespace = self.kube_config.kube_namespace
     self.log.debug("Kubernetes using namespace %s", self.namespace)
     self.kube_client = kube_client
     self.launcher = PodLauncher(kube_client=self.kube_client)
     self.worker_configuration = WorkerConfiguration(
         kube_config=self.kube_config)
     self.watcher_queue = SynchronizedQueue()
     self.worker_uuid = worker_uuid
     self.kube_watcher = self._make_kube_watcher()

示例#2

显示文件

文件： dag_processing.py 项目： Orpheus11/airflow-forked

 def __init__(self, dag_directory, file_paths, max_runs, processor_factory,
              async_mode):
     """
     :param dag_directory: Directory where DAG definitions are kept. All
         files in file_paths should be under this directory
     :type dag_directory: unicode
     :param file_paths: list of file paths that contain DAG definitions
     :type file_paths: list[unicode]
     :param max_runs: The number of times to parse and schedule each file. -1
         for unlimited.
     :type max_runs: int
     :param processor_factory: function that creates processors for DAG
         definition files. Arguments are (dag_definition_path, log_file_path)
     :type processor_factory: (unicode, unicode, list) -> (AbstractDagFileProcessor)
     :param async_mode: Whether to start agent in async mode
     :type async_mode: bool
     """
     self._file_paths = file_paths
     self._file_path_queue = []
     self._dag_directory = dag_directory
     self._max_runs = max_runs
     self._processor_factory = processor_factory
     self._async_mode = async_mode
     # Map from file path to the processor
     self._processors = {}
     # Map from file path to the last runtime
     self._last_runtime = {}
     # Map from file path to the last finish time
     self._last_finish_time = {}
     # Map from file path to the number of runs
     self._run_count = defaultdict(int)
     # Pids of DAG parse
     self._all_pids = []
     # Pipe for communicating signals
     self._parent_signal_conn, self._child_signal_conn = multiprocessing.Pipe(
     )
     # Pipe for communicating DagParsingStat
     self._stat_queue = SynchronizedQueue()
     self._result_queue = SynchronizedQueue()
     self._process = None
     self._done = False
     # Initialized as true so we do not deactivate w/o any actual DAG parsing.
     self._all_files_processed = True
     self._result_count = 0

示例#3

显示文件

    def start(self):
        self.result_queue = SynchronizedQueue()
        self.queue = None
        self.workers = []
        self.workers_used = 0
        self.workers_active = 0
        self.impl = (LocalExecutor._UnlimitedParallelism(self)
                     if self.parallelism == 0 else
                     LocalExecutor._LimitedParallelism(self))

        self.impl.start()

示例#4

显示文件

文件： kubernetes_executor.py 项目： sekikn/incubator-airflow

 def __init__(self, kube_config, task_queue, result_queue, kube_client, worker_uuid):
     self.log.debug("Creating Kubernetes executor")
     self.kube_config = kube_config
     self.task_queue = task_queue
     self.result_queue = result_queue
     self.namespace = self.kube_config.kube_namespace
     self.log.debug("Kubernetes using namespace %s", self.namespace)
     self.kube_client = kube_client
     self.launcher = PodLauncher(kube_client=self.kube_client)
     self.worker_configuration = WorkerConfiguration(kube_config=self.kube_config)
     self.watcher_queue = SynchronizedQueue()
     self.worker_uuid = worker_uuid
     self.kube_watcher = self._make_kube_watcher()

示例#5

显示文件

文件： dag_processing.py 项目： Orpheus11/airflow-forked

class DagFileProcessorAgent(LoggingMixin):
    """
    Agent for DAG file processing. It is responsible for all DAG parsing
    related jobs in scheduler process. Mainly it can spin up DagFileProcessorManager
    in a subprocess, collect DAG parsing results from it and communicate
    signal/DAG parsing stat with it.
    """
    def __init__(self, dag_directory, file_paths, max_runs, processor_factory,
                 async_mode):
        """
        :param dag_directory: Directory where DAG definitions are kept. All
            files in file_paths should be under this directory
        :type dag_directory: unicode
        :param file_paths: list of file paths that contain DAG definitions
        :type file_paths: list[unicode]
        :param max_runs: The number of times to parse and schedule each file. -1
            for unlimited.
        :type max_runs: int
        :param processor_factory: function that creates processors for DAG
            definition files. Arguments are (dag_definition_path, log_file_path)
        :type processor_factory: (unicode, unicode, list) -> (AbstractDagFileProcessor)
        :param async_mode: Whether to start agent in async mode
        :type async_mode: bool
        """
        self._file_paths = file_paths
        self._file_path_queue = []
        self._dag_directory = dag_directory
        self._max_runs = max_runs
        self._processor_factory = processor_factory
        self._async_mode = async_mode
        # Map from file path to the processor
        self._processors = {}
        # Map from file path to the last runtime
        self._last_runtime = {}
        # Map from file path to the last finish time
        self._last_finish_time = {}
        # Map from file path to the number of runs
        self._run_count = defaultdict(int)
        # Pids of DAG parse
        self._all_pids = []
        # Pipe for communicating signals
        self._parent_signal_conn, self._child_signal_conn = multiprocessing.Pipe(
        )
        # Pipe for communicating DagParsingStat
        self._stat_queue = SynchronizedQueue()
        self._result_queue = SynchronizedQueue()
        self._process = None
        self._done = False
        # Initialized as true so we do not deactivate w/o any actual DAG parsing.
        self._all_files_processed = True
        self._result_count = 0

    def start(self):
        """
        Launch DagFileProcessorManager processor and start DAG parsing loop in manager.
        """
        self._process = self._launch_process(
            self._dag_directory, self._file_paths, self._max_runs,
            self._processor_factory, self._child_signal_conn, self._stat_queue,
            self._result_queue, self._async_mode)
        self.log.info("Launched DagFileProcessorManager with pid: %s",
                      self._process.pid)

    def heartbeat(self):
        """
        Should only be used when launched DAG file processor manager in sync mode.
        Send agent heartbeat signal to the manager.
        """
        self._parent_signal_conn.send(DagParsingSignal.AGENT_HEARTBEAT)

    def wait_until_finished(self):
        """
        Should only be used when launched DAG file processor manager in sync mode.
        Wait for done signal from the manager.
        """
        while True:
            if self._parent_signal_conn.recv(
            ) == DagParsingSignal.MANAGER_DONE:
                break

    @staticmethod
    def _launch_process(dag_directory, file_paths, max_runs, processor_factory,
                        signal_conn, _stat_queue, result_queue, async_mode):
        def helper():
            # Reload configurations and settings to avoid collision with parent process.
            # Because this process may need custom configurations that cannot be shared,
            # e.g. RotatingFileHandler. And it can cause connection corruption if we
            # do not recreate the SQLA connection pool.
            os.environ['CONFIG_PROCESSOR_MANAGER_LOGGER'] = 'True'
            # Replicating the behavior of how logging module was loaded
            # in logging_config.py
            reload_module(
                import_module(
                    airflow.settings.LOGGING_CLASS_PATH.rsplit('.', 1)[0]))
            reload_module(airflow.settings)
            airflow.settings.initialize()
            del os.environ['CONFIG_PROCESSOR_MANAGER_LOGGER']
            processor_manager = DagFileProcessorManager(
                dag_directory, file_paths, max_runs, processor_factory,
                signal_conn, _stat_queue, result_queue, async_mode)

            processor_manager.start()

        p = multiprocessing.Process(target=helper,
                                    args=(),
                                    name="DagFileProcessorManager")
        p.start()
        return p

    def harvest_simple_dags(self):
        """
        Harvest DAG parsing results from result queue and sync metadata from stat queue.
        :return: List of parsing result in SimpleDag format.
        """
        # Metadata and results to be harvested can be inconsistent,
        # but it should not be a big problem.
        self._sync_metadata()
        # Heartbeating after syncing metadata so we do not restart manager
        # if it processed all files for max_run times and exit normally.
        self._heartbeat_manager()
        simple_dags = []
        qsize = self._result_queue.qsize()
        for _ in range(qsize):
            simple_dags.append(self._result_queue.get())

        self._result_count = 0

        return simple_dags

    def _heartbeat_manager(self):
        """
        Heartbeat DAG file processor and start it if it is not alive.
        :return:
        """
        if self._process and not self._process.is_alive() and not self.done:
            self.start()

    def _sync_metadata(self):
        """
        Sync metadata from stat queue and only keep the latest stat.
        :return:
        """
        while not self._stat_queue.empty():
            stat = self._stat_queue.get()
            self._file_paths = stat.file_paths
            self._all_pids = stat.all_pids
            self._done = stat.done
            self._all_files_processed = stat.all_files_processed
            self._result_count += stat.result_count

    @property
    def file_paths(self):
        return self._file_paths

    @property
    def done(self):
        return self._done

    @property
    def all_files_processed(self):
        return self._all_files_processed

    def terminate(self):
        """
        Send termination signal to DAG parsing processor manager
        and expect it to terminate all DAG file processors.
        """
        self.log.info("Sending termination message to manager.")
        self._child_signal_conn.send(DagParsingSignal.TERMINATE_MANAGER)

    def end(self):
        """
        Terminate (and then kill) the manager process launched.
        :return:
        """
        if not self._process:
            self.log.warn('Ending without manager process.')
            return
        this_process = psutil.Process(os.getpid())
        try:
            manager_process = psutil.Process(self._process.pid)
        except psutil.NoSuchProcess:
            self.log.info("Manager process not running.")
            return

        # First try SIGTERM
        if manager_process.is_running() \
                and manager_process.pid in [x.pid for x in this_process.children()]:
            self.log.info("Terminating manager process: %s",
                          manager_process.pid)
            manager_process.terminate()
            # TODO: Remove magic number
            timeout = 5
            self.log.info("Waiting up to %ss for manager process to exit...",
                          timeout)
            try:
                psutil.wait_procs({manager_process}, timeout)
            except psutil.TimeoutExpired:
                self.log.debug("Ran out of time while waiting for "
                               "processes to exit")

        # Then SIGKILL
        if manager_process.is_running() \
                and manager_process.pid in [x.pid for x in this_process.children()]:
            self.log.info("Killing manager process: %s", manager_process.pid)
            manager_process.kill()
            manager_process.wait()

示例#6

显示文件

文件： kubernetes_executor.py 项目： sekikn/incubator-airflow

class AirflowKubernetesScheduler(LoggingMixin):
    def __init__(self, kube_config, task_queue, result_queue, kube_client, worker_uuid):
        self.log.debug("Creating Kubernetes executor")
        self.kube_config = kube_config
        self.task_queue = task_queue
        self.result_queue = result_queue
        self.namespace = self.kube_config.kube_namespace
        self.log.debug("Kubernetes using namespace %s", self.namespace)
        self.kube_client = kube_client
        self.launcher = PodLauncher(kube_client=self.kube_client)
        self.worker_configuration = WorkerConfiguration(kube_config=self.kube_config)
        self.watcher_queue = SynchronizedQueue()
        self.worker_uuid = worker_uuid
        self.kube_watcher = self._make_kube_watcher()

    def _make_kube_watcher(self):
        resource_version = KubeResourceVersion.get_current_resource_version()
        watcher = KubernetesJobWatcher(self.namespace, self.watcher_queue,
                                       resource_version, self.worker_uuid)
        watcher.start()
        return watcher

    def _health_check_kube_watcher(self):
        if self.kube_watcher.is_alive():
            pass
        else:
            self.log.error(
                'Error while health checking kube watcher process. '
                'Process died for unknown reasons')
            self.kube_watcher = self._make_kube_watcher()

    def run_next(self, next_job):
        """

        The run_next command will check the task_queue for any un-run jobs.
        It will then create a unique job-id, launch that job in the cluster,
        and store relevant info in the current_jobs map so we can track the job's
        status
        """
        self.log.info('Kubernetes job is %s', str(next_job))
        key, command, kube_executor_config = next_job
        dag_id, task_id, execution_date, try_number = key
        self.log.debug("Kubernetes running for command %s", command)
        self.log.debug("Kubernetes launching image %s", self.kube_config.kube_image)
        pod = self.worker_configuration.make_pod(
            namespace=self.namespace, worker_uuid=self.worker_uuid,
            pod_id=self._create_pod_id(dag_id, task_id),
            dag_id=self._make_safe_label_value(dag_id),
            task_id=self._make_safe_label_value(task_id),
            try_number=try_number,
            execution_date=self._datetime_to_label_safe_datestring(execution_date),
            airflow_command=command, kube_executor_config=kube_executor_config
        )
        # the watcher will monitor pods, so we do not block.
        self.launcher.run_pod_async(pod)
        self.log.debug("Kubernetes Job created!")

    def delete_pod(self, pod_id):
        if self.kube_config.delete_worker_pods:
            try:
                self.kube_client.delete_namespaced_pod(
                    pod_id, self.namespace, body=client.V1DeleteOptions())
            except ApiException as e:
                # If the pod is already deleted
                if e.status != 404:
                    raise

    def sync(self):
        """
        The sync function checks the status of all currently running kubernetes jobs.
        If a job is completed, it's status is placed in the result queue to
        be sent back to the scheduler.

        :return:

        """
        self._health_check_kube_watcher()
        while not self.watcher_queue.empty():
            self.process_watcher_task()

    def process_watcher_task(self):
        pod_id, state, labels, resource_version = self.watcher_queue.get()
        self.log.info(
            'Attempting to finish pod; pod_id: %s; state: %s; labels: %s',
            pod_id, state, labels
        )
        key = self._labels_to_key(labels=labels)
        if key:
            self.log.debug('finishing job %s - %s (%s)', key, state, pod_id)
            self.result_queue.put((key, state, pod_id, resource_version))

    @staticmethod
    def _strip_unsafe_kubernetes_special_chars(string):
        """
        Kubernetes only supports lowercase alphanumeric characters and "-" and "." in
        the pod name
        However, there are special rules about how "-" and "." can be used so let's
        only keep
        alphanumeric chars  see here for detail:
        https://kubernetes.io/docs/concepts/overview/working-with-objects/names/

        :param string: The requested Pod name
        :return: ``str`` Pod name stripped of any unsafe characters
        """
        return ''.join(ch.lower() for ind, ch in enumerate(string) if ch.isalnum())

    @staticmethod
    def _make_safe_pod_id(safe_dag_id, safe_task_id, safe_uuid):
        r"""
        Kubernetes pod names must be <= 253 chars and must pass the following regex for
        validation
        "^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$"

        :param safe_dag_id: a dag_id with only alphanumeric characters
        :param safe_task_id: a task_id with only alphanumeric characters
        :param random_uuid: a uuid
        :return: ``str`` valid Pod name of appropriate length
        """
        MAX_POD_ID_LEN = 253

        safe_key = safe_dag_id + safe_task_id

        safe_pod_id = safe_key[:MAX_POD_ID_LEN - len(safe_uuid) - 1] + "-" + safe_uuid

        return safe_pod_id

    @staticmethod
    def _make_safe_label_value(string):
        """
        Valid label values must be 63 characters or less and must be empty or begin and
        end with an alphanumeric character ([a-z0-9A-Z]) with dashes (-), underscores (_),
        dots (.), and alphanumerics between.

        If the label value is then greater than 63 chars once made safe, or differs in any
        way from the original value sent to this function, then we need to truncate to
        53chars, and append it with a unique hash.
        """
        MAX_LABEL_LEN = 63

        safe_label = re.sub(r'^[^a-z0-9A-Z]*|[^a-zA-Z0-9_\-\.]|[^a-z0-9A-Z]*$', '', string)

        if len(safe_label) > MAX_LABEL_LEN or string != safe_label:
            safe_hash = hashlib.md5(string.encode()).hexdigest()[:9]
            safe_label = safe_label[:MAX_LABEL_LEN - len(safe_hash) - 1] + "-" + safe_hash

        return safe_label

    @staticmethod
    def _create_pod_id(dag_id, task_id):
        safe_dag_id = AirflowKubernetesScheduler._strip_unsafe_kubernetes_special_chars(
            dag_id)
        safe_task_id = AirflowKubernetesScheduler._strip_unsafe_kubernetes_special_chars(
            task_id)
        safe_uuid = AirflowKubernetesScheduler._strip_unsafe_kubernetes_special_chars(
            uuid4().hex)
        return AirflowKubernetesScheduler._make_safe_pod_id(safe_dag_id, safe_task_id,
                                                            safe_uuid)

    @staticmethod
    def _label_safe_datestring_to_datetime(string):
        """
        Kubernetes doesn't permit ":" in labels. ISO datetime format uses ":" but not
        "_", let's
        replace ":" with "_"

        :param string: str
        :return: datetime.datetime object
        """
        return parser.parse(string.replace('_plus_', '+').replace("_", ":"))

    @staticmethod
    def _datetime_to_label_safe_datestring(datetime_obj):
        """
        Kubernetes doesn't like ":" in labels, since ISO datetime format uses ":" but
        not "_" let's
        replace ":" with "_"
        :param datetime_obj: datetime.datetime object
        :return: ISO-like string representing the datetime
        """
        return datetime_obj.isoformat().replace(":", "_").replace('+', '_plus_')

    def _labels_to_key(self, labels):
        try_num = 1
        try:
            try_num = int(labels.get('try_number', '1'))
        except ValueError:
            self.log.warn("could not get try_number as an int: %s", labels.get('try_number', '1'))

        try:
            dag_id = labels['dag_id']
            task_id = labels['task_id']
            ex_time = self._label_safe_datestring_to_datetime(labels['execution_date'])
        except Exception as e:
            self.log.warn(
                'Error while retrieving labels; labels: %s; exception: %s',
                labels, e
            )
            return None

        with create_session() as session:
            tasks = (
                session
                .query(TaskInstance)
                .filter_by(execution_date=ex_time).all()
            )
            self.log.info(
                'Checking %s task instances.',
                len(tasks)
            )
            for task in tasks:
                if (
                    self._make_safe_label_value(task.dag_id) == dag_id and
                    self._make_safe_label_value(task.task_id) == task_id and
                    task.execution_date == ex_time
                ):
                    self.log.info(
                        'Found matching task %s-%s (%s) with current state of %s',
                        task.dag_id, task.task_id, task.execution_date, task.state
                    )
                    dag_id = task.dag_id
                    task_id = task.task_id
                    return (dag_id, task_id, ex_time, try_num)
        self.log.warn(
            'Failed to find and match task details to a pod; labels: %s',
            labels
        )
        return None

示例#7

显示文件

文件： kubernetes_executor.py 项目： Orpheus11/airflow-forked

class AirflowKubernetesScheduler(LoggingMixin):
    def __init__(self, kube_config, task_queue, result_queue, kube_client,
                 worker_uuid):
        self.log.debug("Creating Kubernetes executor")
        self.kube_config = kube_config
        self.task_queue = task_queue
        self.result_queue = result_queue
        self.namespace = self.kube_config.kube_namespace
        self.log.debug("Kubernetes using namespace %s", self.namespace)
        self.kube_client = kube_client
        self.launcher = PodLauncher(kube_client=self.kube_client)
        self.worker_configuration = WorkerConfiguration(
            kube_config=self.kube_config)
        self.watcher_queue = SynchronizedQueue()
        self.worker_uuid = worker_uuid
        self.kube_watcher = self._make_kube_watcher()

    def _make_kube_watcher(self):
        resource_version = KubeResourceVersion.get_current_resource_version()
        watcher = KubernetesJobWatcher(self.namespace, self.watcher_queue,
                                       resource_version, self.worker_uuid)
        watcher.start()
        return watcher

    def _health_check_kube_watcher(self):
        if self.kube_watcher.is_alive():
            pass
        else:
            self.log.error('Error while health checking kube watcher process. '
                           'Process died for unknown reasons')
            self.kube_watcher = self._make_kube_watcher()

    def run_next(self, next_job):
        """

        The run_next command will check the task_queue for any un-run jobs.
        It will then create a unique job-id, launch that job in the cluster,
        and store relevant info in the current_jobs map so we can track the job's
        status
        """
        self.log.info('Kubernetes job is %s', str(next_job))
        key, command, kube_executor_config = next_job
        dag_id, task_id, execution_date, try_number = key
        self.log.debug("Kubernetes running for command %s", command)
        self.log.debug("Kubernetes launching image %s",
                       self.kube_config.kube_image)
        pod = self.worker_configuration.make_pod(
            namespace=self.namespace,
            worker_uuid=self.worker_uuid,
            pod_id=self._create_pod_id(dag_id, task_id),
            dag_id=self._make_safe_label_value(dag_id),
            task_id=self._make_safe_label_value(task_id),
            try_number=try_number,
            execution_date=self._datetime_to_label_safe_datestring(
                execution_date),
            airflow_command=command,
            kube_executor_config=kube_executor_config)
        # the watcher will monitor pods, so we do not block.
        self.launcher.run_pod_async(pod)
        self.log.debug("Kubernetes Job created!")

    def delete_pod(self, pod_id):
        if self.kube_config.delete_worker_pods:
            try:
                self.kube_client.delete_namespaced_pod(
                    pod_id, self.namespace, body=client.V1DeleteOptions())
            except ApiException as e:
                # If the pod is already deleted
                if e.status != 404:
                    raise

    def sync(self):
        """
        The sync function checks the status of all currently running kubernetes jobs.
        If a job is completed, it's status is placed in the result queue to
        be sent back to the scheduler.

        :return:

        """
        self._health_check_kube_watcher()
        while not self.watcher_queue.empty():
            self.process_watcher_task()

    def process_watcher_task(self):
        pod_id, state, labels, resource_version = self.watcher_queue.get()
        self.log.info(
            'Attempting to finish pod; pod_id: %s; state: %s; labels: %s',
            pod_id, state, labels)
        key = self._labels_to_key(labels=labels)
        if key:
            self.log.debug('finishing job %s - %s (%s)', key, state, pod_id)
            self.result_queue.put((key, state, pod_id, resource_version))

    @staticmethod
    def _strip_unsafe_kubernetes_special_chars(string):
        """
        Kubernetes only supports lowercase alphanumeric characters and "-" and "." in
        the pod name
        However, there are special rules about how "-" and "." can be used so let's
        only keep
        alphanumeric chars  see here for detail:
        https://kubernetes.io/docs/concepts/overview/working-with-objects/names/

        :param string: The requested Pod name
        :return: ``str`` Pod name stripped of any unsafe characters
        """
        return ''.join(ch.lower() for ind, ch in enumerate(string)
                       if ch.isalnum())

    @staticmethod
    def _make_safe_pod_id(safe_dag_id, safe_task_id, safe_uuid):
        r"""
        Kubernetes pod names must be <= 253 chars and must pass the following regex for
        validation
        "^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$"

        :param safe_dag_id: a dag_id with only alphanumeric characters
        :param safe_task_id: a task_id with only alphanumeric characters
        :param random_uuid: a uuid
        :return: ``str`` valid Pod name of appropriate length
        """
        MAX_POD_ID_LEN = 253

        safe_key = safe_dag_id + safe_task_id

        safe_pod_id = safe_key[:MAX_POD_ID_LEN - len(safe_uuid) -
                               1] + "-" + safe_uuid

        return safe_pod_id

    @staticmethod
    def _make_safe_label_value(string):
        """
        Valid label values must be 63 characters or less and must be empty or begin and
        end with an alphanumeric character ([a-z0-9A-Z]) with dashes (-), underscores (_),
        dots (.), and alphanumerics between.

        If the label value is then greater than 63 chars once made safe, or differs in any
        way from the original value sent to this function, then we need to truncate to
        53chars, and append it with a unique hash.
        """
        MAX_LABEL_LEN = 63

        safe_label = re.sub(r'^[^a-z0-9A-Z]*|[^a-zA-Z0-9_\-\.]|[^a-z0-9A-Z]*$',
                            '', string)

        if len(safe_label) > MAX_LABEL_LEN or string != safe_label:
            safe_hash = hashlib.md5(string.encode()).hexdigest()[:9]
            safe_label = safe_label[:MAX_LABEL_LEN - len(safe_hash) -
                                    1] + "-" + safe_hash

        return safe_label

    @staticmethod
    def _create_pod_id(dag_id, task_id):
        safe_dag_id = AirflowKubernetesScheduler._strip_unsafe_kubernetes_special_chars(
            dag_id)
        safe_task_id = AirflowKubernetesScheduler._strip_unsafe_kubernetes_special_chars(
            task_id)
        safe_uuid = AirflowKubernetesScheduler._strip_unsafe_kubernetes_special_chars(
            uuid4().hex)
        return AirflowKubernetesScheduler._make_safe_pod_id(
            safe_dag_id, safe_task_id, safe_uuid)

    @staticmethod
    def _label_safe_datestring_to_datetime(string):
        """
        Kubernetes doesn't permit ":" in labels. ISO datetime format uses ":" but not
        "_", let's
        replace ":" with "_"

        :param string: str
        :return: datetime.datetime object
        """
        return parser.parse(string.replace('_plus_', '+').replace("_", ":"))

    @staticmethod
    def _datetime_to_label_safe_datestring(datetime_obj):
        """
        Kubernetes doesn't like ":" in labels, since ISO datetime format uses ":" but
        not "_" let's
        replace ":" with "_"
        :param datetime_obj: datetime.datetime object
        :return: ISO-like string representing the datetime
        """
        return datetime_obj.isoformat().replace(":",
                                                "_").replace('+', '_plus_')

    def _labels_to_key(self, labels):
        try_num = 1
        try:
            try_num = int(labels.get('try_number', '1'))
        except ValueError:
            self.log.warn("could not get try_number as an int: %s",
                          labels.get('try_number', '1'))

        try:
            dag_id = labels['dag_id']
            task_id = labels['task_id']
            ex_time = self._label_safe_datestring_to_datetime(
                labels['execution_date'])
        except Exception as e:
            self.log.warn(
                'Error while retrieving labels; labels: %s; exception: %s',
                labels, e)
            return None

        with create_session() as session:
            tasks = (session.query(TaskInstance).filter_by(
                execution_date=ex_time).all())
            self.log.info('Checking %s task instances.', len(tasks))
            for task in tasks:
                if (self._make_safe_label_value(task.dag_id) == dag_id and
                        self._make_safe_label_value(task.task_id) == task_id
                        and task.execution_date == ex_time):
                    self.log.info(
                        'Found matching task %s-%s (%s) with current state of %s',
                        task.dag_id, task.task_id, task.execution_date,
                        task.state)
                    dag_id = task.dag_id
                    task_id = task.task_id
                    return (dag_id, task_id, ex_time, try_num)
        self.log.warn(
            'Failed to find and match task details to a pod; labels: %s',
            labels)
        return None