def _get_executor(executor_name): """ Creates a new instance of the named executor. In case the executor name is not know in airflow, look for it in the plugins """ if executor_name == 'LocalExecutor': return LocalExecutor() elif executor_name == 'SequentialExecutor': return SequentialExecutor() elif executor_name == 'CeleryExecutor': from xTool.executors.celery_executor import CeleryExecutor return CeleryExecutor() elif executor_name == 'DaskExecutor': from xTool.executors.dask_executor import DaskExecutor return DaskExecutor() else: # 使用第三方executor # Loading plugins _integrate_plugins() executor_path = executor_name.split('.') if len(executor_path) != 2: raise XToolException( "Executor {0} not supported: please specify in format plugin_module.executor" .format(executor_name)) if executor_path[0] in globals(): return globals()[executor_path[0]].__dict__[executor_path[1]]() else: raise XToolException( "Executor {0} not supported.".format(executor_name))
def validate_key(k, max_length=250): """验证key的格式 .""" if not isinstance(k, basestring): raise TypeError("The key has to be a string") elif len(k) > max_length: raise XToolException( "The key has to be less than {0} characters".format(max_length)) elif not re.match(r'^[A-Za-z0-9_\-\.]+$', k): raise XToolException( "The key ({k}) has to be made of alphanumeric characters, dashes, " "dots and underscores exclusively".format(**locals())) else: return True
def load_backend_module_from_conf(section, key, default_backend, conf=None): """从配置文件中加载模块 . Args: section: 配置文件中的section key: section中的key default_backend: 默认后端模块配置 conf: 配置对象 Returns: 返回加载的模块 """ module = None # 从配置文件中获取配置的后端模块 if conf is None: backend = default_backend else: try: backend = conf.get(section, key) except conf.XToolConfigException: # 没有配置 backend = default_backend # 加载模块,加载失败抛出异常 try: module = import_module(backend) except ImportError as err: log.critical("Cannot import %s for %s %s due to: %s", backend, section, key, err) raise XToolException(err) return module
def done(self): """判断文件处理子进程是否已经执行完成 .""" if self._process is None: raise XToolException("Tried to see if it's done before starting!") if self._done: return True # 如果子进程有结果返回 if not self._result_queue.empty(): # 获得执行结果 self._result = self._result_queue.get_nowait() self._done = True self.log.debug("Waiting for %s", self._process) # 等待子进程释放资源并结束 self._process.join() return True # 如果子进程已经执行完成 if not self._process.is_alive(): # 设置完成标记 self._done = True # 获得子进程执行结果 if not self._result_queue.empty(): self._result = self._result_queue.get_nowait() # 等待子进程资源释放 self.log.debug("Waiting for %s", self._process) self._process.join() return True return False
def wrapper(*args, **kwargs): # 列表参数最多一个 if len(args) > 1: raise XToolException( "Use keyword arguments when initializing operators") # 获得参数中的dag对象,或从全局变量中获取dag dag_args = {} dag_params = {} dag = kwargs.get('dag', None) or airflow.models._CONTEXT_MANAGER_DAG if dag: dag_args = copy(dag.default_args) or {} dag_params = copy(dag.params) or {} # 把函数的params参数,与dag.parms合并 params = {} if 'params' in kwargs: params = kwargs['params'] dag_params.update(params) # 把函数参数中的kwargs['default_args']['params'] 与 dag.parms合并 # 删除函数参数中的 kwargs['default_args']['params'] default_args = {} if 'default_args' in kwargs: default_args = kwargs['default_args'] if 'params' in default_args: dag_params.update(default_args['params']) del default_args['params'] # 把函数参数中kwargs['default_args'],与dag.default_args合并 dag_args.update(default_args) default_args = dag_args # 将kwargs['default_args']中的参数合并到函数的kwargs中 for arg in sig_cache.parameters: if arg not in kwargs and arg in default_args: kwargs[arg] = default_args[arg] # 获得没有设置默认值的参数,抛出异常 missing_args = list(non_optional_args - set(kwargs)) if missing_args: msg = "Argument {0} is required".format(missing_args) raise XToolException(msg) kwargs['params'] = dag_params result = func(*args, **kwargs) return result
def wrapper(*args, **kwargs): # 列表参数最多一个 if len(args) > 1: raise XToolException( "Use keyword arguments when initializing operators") # 获得参数中的model对象 model_args = {} model_params = {} model = kwargs.get('model', None) if model: model_args = copy(model.default_args) or {} model_params = copy(model.params) or {} # 把函数的params参数,与model.parms合并 params = {} if 'params' in kwargs: params = kwargs['params'] model_params.update(params) # 把函数参数中的kwargs['default_args']['params'] 与 model.parms合并 # 删除函数参数中的 kwargs['default_args']['params'] default_args = {} if 'default_args' in kwargs: default_args = kwargs['default_args'] if 'params' in default_args: model_params.update(default_args['params']) del default_args['params'] # 把函数参数中kwargs['default_args'],与model.default_args合并 model_args.update(default_args) default_args = model_args # 将kwargs['default_args']中的参数合并到函数的kwargs中 for arg in sig_cache.parameters: if arg not in kwargs and arg in default_args: kwargs[arg] = default_args[arg] # 获得没有设置默认值的参数,抛出异常 missing_args = list(non_optional_args - set(kwargs)) if missing_args: msg = "Argument {0} is required".format(missing_args) raise XToolException(msg) kwargs['params'] = model_params result = func(*args, **kwargs) return result
def __init__(self, name, units_str, qty): if qty < 0: raise XToolException( 'Received resource quantity {} for resource {} but resource quantity ' 'must be non-negative.'.format(qty, name)) self._name = name self._units_str = units_str self._qty = qty
def get_dag(self, dag_id): """ :param dag_id: DAG ID :type dag_id: unicode :return: if the given DAG ID exists in the bag, return the BaseDag corresponding to that ID. Otherwise, throw an Exception :rtype: SimpleDag """ if dag_id not in self.dag_id_to_simple_dag: raise XToolException("Unknown DAG ID {}".format(dag_id)) return self.dag_id_to_simple_dag[dag_id]
def create_object_from_plugin_module(name, *args, **kwargs): """从插件模块中获取类实例 . Args: name: plugin_module.class_name """ items = name.split('.') if len(items) != 2: raise XToolException( "Executor {0} not supported: " "please specify in format plugin_module.executor".format(name)) # items[0]:表示插件名 # items[1]:表示插件中的类名 plugin_module_name = items[0] class_name = items[1] if plugin_module_name in globals(): # 根据插件中的类名创建对象 return globals()[plugin_module_name].__dict__[ class_name](*args, **kwargs) else: raise XToolException("Executor {0} not supported.".format(name))
def kill(self, session=None): """关闭job .""" job = session.query(BaseJob).filter(BaseJob.id == self.id).first() job.end_date = timezone.system_now() # 杀死job try: self.on_kill() except Exception as e: self.log.error('on_kill() method failed: {}'.format(e)) # 保存job的关闭时间 session.merge(job) session.commit() # 抛出异常 raise XToolException("Job shut down externally.")
def topological_sort(tasks): """拓扑排序 Sorts tasks in topographical order, such that a task comes after any of its upstream dependencies. Heavily inspired by: http://blog.jupo.org/2012/04/06/topological-sorting-acyclic-directed-graphs/ :return: list of tasks in topological order """ graph_sorted = [] # special case if not tasks: return tuple(graph_sorted) # copy the the tasks so we leave it unmodified graph_unsorted = tasks[:] # Run until the unsorted graph is empty. while graph_unsorted: # Go through each of the node/edges pairs in the unsorted # graph. If a set of edges doesn't contain any nodes that # haven't been resolved, that is, that are still in the # unsorted graph, remove the pair from the unsorted graph, # and append it to the sorted graph. Note here that by using # using the items() method for iterating, a copy of the # unsorted graph is used, allowing us to modify the unsorted # graph as we move through it. We also keep a flag for # checking that that graph is acyclic, which is true if any # nodes are resolved during each pass through the graph. If # not, we need to bail out as the graph therefore can't be # sorted. acyclic = False for node in graph_unsorted: for edge in node.upstream_list: if edge in graph_unsorted: break # no edges in upstream tasks else: # 无环 acyclic = True graph_unsorted.remove(node) graph_sorted.append(node) if not acyclic: raise XToolException("A cyclic dependency occurred") return tuple(graph_sorted)
def terminate(self, sigkill=False): """终止文件处理子进程 .""" if self._process is None: raise XToolException("Tried to call stop before starting!") # 删除结果队列 self._result_queue = None # 终止进程 self._process.terminate() # 等待进程被杀死 self._process.join(5) # 是否需要强制再次杀死存活的文件处理进程 if sigkill and self._process.is_alive(): # 如果进程被终止后依然存活,发送SIGKILL信号杀死进程 self.log.warning("Killing PID %s", self._process.pid) os.kill(self._process.pid, signal.SIGKILL)
def get_task_runner(local_task_job, conf): """获得任务实例运行器,用于消费者worker Get the task runner that can be used to run the given job. :param local_task_job: The LocalTaskJob associated with the TaskInstance that needs to be executed. :type local_task_job: airflow.jobs.LocalTaskJob :return: The task runner to use to run the task. :rtype: airflow.task.task_runner.base_task_runner.BaseTaskRunner """ _TASK_RUNNER = conf.get('core', 'TASK_RUNNER') if _TASK_RUNNER == "BashTaskRunner": return BashTaskRunner(local_task_job, conf) elif _TASK_RUNNER == "CgroupTaskRunner": from airflow.contrib.task_runner.cgroup_task_runner import CgroupTaskRunner return CgroupTaskRunner(local_task_job, conf) else: raise XToolException( "Unknown task runner type {}".format(_TASK_RUNNER))
def run_command(command): """执行shell命令,返回标准输出(Unicode编码) .""" if platform.system() == 'Windows': close_fds = False else: close_fds = True process = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, close_fds=close_fds) # 将结果转换为unicode编码 output, stderr = [ stream.decode(sys.getdefaultencoding(), 'ignore') for stream in process.communicate() ] if process.returncode != 0: raise XToolException( "Cannot execute {}. Error code is: {}. Output: {}, Stderr: {}". format(command, process.returncode, output, stderr)) # 返回unicode编码的标准输出 return output
def pid(self): """获得文件处理子进程的PID .""" if self._process is None: raise XToolException("Tried to get PID before starting!") return self._process.pid
def start_time(self): """获得文件处理子进程的启动时间 .""" if self._start_time is None: raise XToolException("Tried to get start time before it started!") return self._start_time
def result(self): """获得文件处理子进程的执行结果 .""" if not self.done: raise XToolException("Tried to get the result before it's done!") return self._result
def exit_code(self): """获得文件处理子进程的错误码 .""" if not self._done: raise XToolException( "Tried to call retcode before process was finished!") return self._process.exitcode