def set( cls, key, value, execution_date, task_id, dag_id, session=None): """保存中间结果 Store an XCom value. TODO: "pickling" has been deprecated and JSON is preferred. "pickling" will be removed in Airflow 2.0. :return: None """ # 调用底层,清除所有session实例 session.expunge_all() enable_pickling = configuration.getboolean('core', 'enable_xcom_pickling') # 序列化中间结果 if enable_pickling: value = pickle.dumps(value) else: try: # 注意编码转换 value = json.dumps(value).encode('UTF-8') except ValueError: log = LoggingMixin().log log.error("Could not serialize the XCOM value into JSON. " "If you are using pickles instead of JSON " "for XCOM, then you need to enable pickle " "support for XCOM in your airflow config.") raise # remove any duplicate XComs # 删除相同key的中间结果 session.query(cls).filter( cls.key == key, cls.execution_date == execution_date, cls.task_id == task_id, cls.dag_id == dag_id).delete() session.commit() # insert new XCom session.add(XCom( key=key, value=value, execution_date=execution_date, task_id=task_id, dag_id=dag_id)) session.commit()
def execute_command(command): """airflow worker 执行shell命令 .""" log = LoggingMixin().log log.info("Executing command in Celery: %s", command) env = os.environ.copy() try: # celery worker 收到消息后,执行消息中的shell命令 subprocess.check_call(command, shell=True, stderr=subprocess.STDOUT, close_fds=True, env=env) except subprocess.CalledProcessError as e: log.exception('execute_command encountered a CalledProcessError') log.error(e.output) raise AirflowException('Celery command failed')
def get_one(cls, execution_date, key=None, task_id=None, dag_id=None, include_prior_dates=False, session=None): """获取一条中间结果 Retrieve an XCom value, optionally meeting certain criteria. TODO: "pickling" has been deprecated and JSON is preferred. "pickling" will be removed in Airflow 2.0. :return: XCom value """ # 构造搜索条件 filters = [] if key: filters.append(cls.key == key) if task_id: filters.append(cls.task_id == task_id) if dag_id: filters.append(cls.dag_id == dag_id) if include_prior_dates: filters.append(cls.execution_date <= execution_date) else: filters.append(cls.execution_date == execution_date) query = ( session.query(cls.value).filter(and_(*filters)) .order_by(cls.execution_date.desc(), cls.timestamp.desc())) # 获得最近的一条记录 result = query.first() if result: enable_pickling = configuration.getboolean('core', 'enable_xcom_pickling') if enable_pickling: return pickle.loads(result.value) else: try: # 注意编码转换 return json.loads(result.value.decode('UTF-8')) except ValueError: log = LoggingMixin().log log.error("Could not deserialize the XCOM value from JSON. " "If you are using pickles instead of JSON " "for XCOM, then you need to enable pickle " "support for XCOM in your airflow config.") raise
def get_val(self): """获得常量的值 .""" log = LoggingMixin().log if self._val and self.is_encrypted: try: # 解密常量值 fernet_key = configuration.conf.get('core', 'FERNET_KEY') fernet = get_fernet(fernet_key) return fernet.decrypt(bytes(self._val, 'utf-8')).decode() except InvalidFernetToken: # 解密失败返回None log.error("Can't decrypt _val for key={}, invalid token " "or value".format(self.key)) return None except Exception: log.error("Can't decrypt _val for key={}, FERNET_KEY " "configuration missing".format(self.key)) return None else: return self._val
# 遍历模块的属性值 for obj in list(m.__dict__.values()): # 判断模块中的类是否为 XToolPlugin 的子类 if ( inspect.isclass(obj) and issubclass(obj, XToolPlugin) and obj is not XToolPlugin): # 验证子类中是否定义了name静态变量 obj.validate() # 将类加入到插件列表中 if obj not in plugins: plugins.append(obj) except Exception as e: log.exception(e) log.error('Failed to import plugin %s', filepath) def make_module(name, objects): """动态创建模块 . :param name: 模块名称 :param objects: 模块中需要包含的对象列表 """ log.debug('Creating module %s', name) name = name.lower() # 创建模块 module = imp.new_module(name) # 给模块设置_name属性 (插件名) module._name = name.split('.')[-1] # 给模块设置_object属性(插件中所有的类名)
from hdfs import InsecureClient, HdfsError from airflow import configuration from airflow.exceptions import AirflowException from airflow.hooks.base_hook import BaseHook from xTool.utils.log.logging_mixin import LoggingMixin _kerberos_security_mode = configuration.conf.get("core", "security") == "kerberos" if _kerberos_security_mode: try: from hdfs.ext.kerberos import KerberosClient except ImportError: log = LoggingMixin().log log.error("Could not load the Kerberos extension for the WebHDFSHook.") raise class AirflowWebHDFSHookException(AirflowException): pass class WebHDFSHook(BaseHook): """ Interact with HDFS. This class is a wrapper around the hdfscli library. """ def __init__(self, webhdfs_conn_id='webhdfs_default', proxy_user=None): self.webhdfs_conn_id = webhdfs_conn_id self.proxy_user = proxy_user