Пример #1
0
def _get_executor(executor_name):
    """
    Creates a new instance of the named executor. In case the executor name is not know in airflow,
    look for it in the plugins
    """
    if executor_name == 'LocalExecutor':
        return LocalExecutor()
    elif executor_name == 'SequentialExecutor':
        return SequentialExecutor()
    elif executor_name == 'CeleryExecutor':
        from xTool.executors.celery_executor import CeleryExecutor
        return CeleryExecutor()
    elif executor_name == 'DaskExecutor':
        from xTool.executors.dask_executor import DaskExecutor
        return DaskExecutor()
    else:
        # 使用第三方executor
        # Loading plugins
        _integrate_plugins()
        executor_path = executor_name.split('.')
        if len(executor_path) != 2:
            raise XToolException(
                "Executor {0} not supported: please specify in format plugin_module.executor"
                .format(executor_name))

        if executor_path[0] in globals():
            return globals()[executor_path[0]].__dict__[executor_path[1]]()
        else:
            raise XToolException(
                "Executor {0} not supported.".format(executor_name))
Пример #2
0
def validate_key(k, max_length=250):
    """验证key的格式 ."""
    if not isinstance(k, basestring):
        raise TypeError("The key has to be a string")
    elif len(k) > max_length:
        raise XToolException(
            "The key has to be less than {0} characters".format(max_length))
    elif not re.match(r'^[A-Za-z0-9_\-\.]+$', k):
        raise XToolException(
            "The key ({k}) has to be made of alphanumeric characters, dashes, "
            "dots and underscores exclusively".format(**locals()))
    else:
        return True
Пример #3
0
def load_backend_module_from_conf(section, key, default_backend, conf=None):
    """从配置文件中加载模块 .
    
    Args:
        section: 配置文件中的section
        key: section中的key
        default_backend: 默认后端模块配置
        conf: 配置对象

    Returns:
        返回加载的模块
    """
    module = None
    # 从配置文件中获取配置的后端模块
    if conf is None:
        backend = default_backend
    else:
        try:
            backend = conf.get(section, key)
        except conf.XToolConfigException:
            # 没有配置
            backend = default_backend

    # 加载模块,加载失败抛出异常
    try:
        module = import_module(backend)
    except ImportError as err:
        log.critical("Cannot import %s for %s %s due to: %s", backend, section,
                     key, err)
        raise XToolException(err)

    return module
Пример #4
0
    def done(self):
        """判断文件处理子进程是否已经执行完成 ."""
        if self._process is None:
            raise XToolException("Tried to see if it's done before starting!")

        if self._done:
            return True

        # 如果子进程有结果返回
        if not self._result_queue.empty():
            # 获得执行结果
            self._result = self._result_queue.get_nowait()
            self._done = True
            self.log.debug("Waiting for %s", self._process)
            # 等待子进程释放资源并结束
            self._process.join()
            return True

        # 如果子进程已经执行完成
        if not self._process.is_alive():
            # 设置完成标记
            self._done = True
            # 获得子进程执行结果
            if not self._result_queue.empty():
                self._result = self._result_queue.get_nowait()
            # 等待子进程资源释放
            self.log.debug("Waiting for %s", self._process)
            self._process.join()
            return True

        return False
Пример #5
0
    def wrapper(*args, **kwargs):
        # 列表参数最多一个
        if len(args) > 1:
            raise XToolException(
                "Use keyword arguments when initializing operators")
        # 获得参数中的dag对象,或从全局变量中获取dag
        dag_args = {}
        dag_params = {}
        dag = kwargs.get('dag', None) or airflow.models._CONTEXT_MANAGER_DAG
        if dag:
            dag_args = copy(dag.default_args) or {}
            dag_params = copy(dag.params) or {}

        # 把函数的params参数,与dag.parms合并
        params = {}
        if 'params' in kwargs:
            params = kwargs['params']
        dag_params.update(params)

        # 把函数参数中的kwargs['default_args']['params'] 与 dag.parms合并
        # 删除函数参数中的 kwargs['default_args']['params']
        default_args = {}
        if 'default_args' in kwargs:
            default_args = kwargs['default_args']
            if 'params' in default_args:
                dag_params.update(default_args['params'])
                del default_args['params']

        # 把函数参数中kwargs['default_args'],与dag.default_args合并
        dag_args.update(default_args)
        default_args = dag_args

        # 将kwargs['default_args']中的参数合并到函数的kwargs中
        for arg in sig_cache.parameters:
            if arg not in kwargs and arg in default_args:
                kwargs[arg] = default_args[arg]

        # 获得没有设置默认值的参数,抛出异常
        missing_args = list(non_optional_args - set(kwargs))
        if missing_args:
            msg = "Argument {0} is required".format(missing_args)
            raise XToolException(msg)

        kwargs['params'] = dag_params

        result = func(*args, **kwargs)
        return result
Пример #6
0
    def wrapper(*args, **kwargs):
        # 列表参数最多一个
        if len(args) > 1:
            raise XToolException(
                "Use keyword arguments when initializing operators")
        # 获得参数中的model对象
        model_args = {}
        model_params = {}
        model = kwargs.get('model', None)
        if model:
            model_args = copy(model.default_args) or {}
            model_params = copy(model.params) or {}

        # 把函数的params参数,与model.parms合并
        params = {}
        if 'params' in kwargs:
            params = kwargs['params']
        model_params.update(params)

        # 把函数参数中的kwargs['default_args']['params'] 与 model.parms合并
        # 删除函数参数中的 kwargs['default_args']['params']
        default_args = {}
        if 'default_args' in kwargs:
            default_args = kwargs['default_args']
            if 'params' in default_args:
                model_params.update(default_args['params'])
                del default_args['params']

        # 把函数参数中kwargs['default_args'],与model.default_args合并
        model_args.update(default_args)
        default_args = model_args
        # 将kwargs['default_args']中的参数合并到函数的kwargs中
        for arg in sig_cache.parameters:
            if arg not in kwargs and arg in default_args:
                kwargs[arg] = default_args[arg]

        # 获得没有设置默认值的参数,抛出异常
        missing_args = list(non_optional_args - set(kwargs))
        if missing_args:
            msg = "Argument {0} is required".format(missing_args)
            raise XToolException(msg)

        kwargs['params'] = model_params
        result = func(*args, **kwargs)
        return result
Пример #7
0
    def __init__(self, name, units_str, qty):
        if qty < 0:
            raise XToolException(
                'Received resource quantity {} for resource {} but resource quantity '
                'must be non-negative.'.format(qty, name))

        self._name = name
        self._units_str = units_str
        self._qty = qty
Пример #8
0
 def get_dag(self, dag_id):
     """
     :param dag_id: DAG ID
     :type dag_id: unicode
     :return: if the given DAG ID exists in the bag, return the BaseDag
     corresponding to that ID. Otherwise, throw an Exception
     :rtype: SimpleDag
     """
     if dag_id not in self.dag_id_to_simple_dag:
         raise XToolException("Unknown DAG ID {}".format(dag_id))
     return self.dag_id_to_simple_dag[dag_id]
Пример #9
0
def create_object_from_plugin_module(name, *args, **kwargs):
    """从插件模块中获取类实例 .

    Args:
        name: plugin_module.class_name
    """
    items = name.split('.')
    if len(items) != 2:
        raise XToolException(
            "Executor {0} not supported: "
            "please specify in format plugin_module.executor".format(name))
    # items[0]:表示插件名
    # items[1]:表示插件中的类名
    plugin_module_name = items[0]
    class_name = items[1]
    if plugin_module_name in globals():
        # 根据插件中的类名创建对象
        return globals()[plugin_module_name].__dict__[
            class_name](*args, **kwargs)
    else:
        raise XToolException("Executor {0} not supported.".format(name))
Пример #10
0
 def kill(self, session=None):
     """关闭job ."""
     job = session.query(BaseJob).filter(BaseJob.id == self.id).first()
     job.end_date = timezone.system_now()
     # 杀死job
     try:
         self.on_kill()
     except Exception as e:
         self.log.error('on_kill() method failed: {}'.format(e))
     # 保存job的关闭时间
     session.merge(job)
     session.commit()
     # 抛出异常
     raise XToolException("Job shut down externally.")
Пример #11
0
def topological_sort(tasks):
    """拓扑排序
    Sorts tasks in topographical order, such that a task comes after any of its
    upstream dependencies.

    Heavily inspired by:
    http://blog.jupo.org/2012/04/06/topological-sorting-acyclic-directed-graphs/

    :return: list of tasks in topological order
    """
    graph_sorted = []

    # special case
    if not tasks:
        return tuple(graph_sorted)

    # copy the the tasks so we leave it unmodified
    graph_unsorted = tasks[:]

    # Run until the unsorted graph is empty.
    while graph_unsorted:
        # Go through each of the node/edges pairs in the unsorted
        # graph. If a set of edges doesn't contain any nodes that
        # haven't been resolved, that is, that are still in the
        # unsorted graph, remove the pair from the unsorted graph,
        # and append it to the sorted graph. Note here that by using
        # using the items() method for iterating, a copy of the
        # unsorted graph is used, allowing us to modify the unsorted
        # graph as we move through it. We also keep a flag for
        # checking that that graph is acyclic, which is true if any
        # nodes are resolved during each pass through the graph. If
        # not, we need to bail out as the graph therefore can't be
        # sorted.
        acyclic = False
        for node in graph_unsorted:
            for edge in node.upstream_list:
                if edge in graph_unsorted:
                    break
            # no edges in upstream tasks
            else:
                # 无环
                acyclic = True
                graph_unsorted.remove(node)
                graph_sorted.append(node)

        if not acyclic:
            raise XToolException("A cyclic dependency occurred")

    return tuple(graph_sorted)
Пример #12
0
 def terminate(self, sigkill=False):
     """终止文件处理子进程 ."""
     if self._process is None:
         raise XToolException("Tried to call stop before starting!")
     # 删除结果队列
     self._result_queue = None
     # 终止进程
     self._process.terminate()
     # 等待进程被杀死
     self._process.join(5)
     # 是否需要强制再次杀死存活的文件处理进程
     if sigkill and self._process.is_alive():
         # 如果进程被终止后依然存活,发送SIGKILL信号杀死进程
         self.log.warning("Killing PID %s", self._process.pid)
         os.kill(self._process.pid, signal.SIGKILL)
Пример #13
0
def get_task_runner(local_task_job, conf):
    """获得任务实例运行器,用于消费者worker
    Get the task runner that can be used to run the given job.

    :param local_task_job: The LocalTaskJob associated with the TaskInstance
    that needs to be executed.
    :type local_task_job: airflow.jobs.LocalTaskJob
    :return: The task runner to use to run the task.
    :rtype: airflow.task.task_runner.base_task_runner.BaseTaskRunner
    """
    _TASK_RUNNER = conf.get('core', 'TASK_RUNNER')
    if _TASK_RUNNER == "BashTaskRunner":
        return BashTaskRunner(local_task_job, conf)
    elif _TASK_RUNNER == "CgroupTaskRunner":
        from airflow.contrib.task_runner.cgroup_task_runner import CgroupTaskRunner
        return CgroupTaskRunner(local_task_job, conf)
    else:
        raise XToolException(
            "Unknown task runner type {}".format(_TASK_RUNNER))
Пример #14
0
def run_command(command):
    """执行shell命令,返回标准输出(Unicode编码) ."""
    if platform.system() == 'Windows':
        close_fds = False
    else:
        close_fds = True
    process = subprocess.Popen(shlex.split(command),
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               shell=True,
                               close_fds=close_fds)
    # 将结果转换为unicode编码
    output, stderr = [
        stream.decode(sys.getdefaultencoding(), 'ignore')
        for stream in process.communicate()
    ]

    if process.returncode != 0:
        raise XToolException(
            "Cannot execute {}. Error code is: {}. Output: {}, Stderr: {}".
            format(command, process.returncode, output, stderr))
    # 返回unicode编码的标准输出
    return output
Пример #15
0
 def pid(self):
     """获得文件处理子进程的PID ."""
     if self._process is None:
         raise XToolException("Tried to get PID before starting!")
     return self._process.pid
Пример #16
0
 def start_time(self):
     """获得文件处理子进程的启动时间 ."""
     if self._start_time is None:
         raise XToolException("Tried to get start time before it started!")
     return self._start_time
Пример #17
0
 def result(self):
     """获得文件处理子进程的执行结果 ."""
     if not self.done:
         raise XToolException("Tried to get the result before it's done!")
     return self._result
Пример #18
0
 def exit_code(self):
     """获得文件处理子进程的错误码 ."""
     if not self._done:
         raise XToolException(
             "Tried to call retcode before process was finished!")
     return self._process.exitcode