def __init__(self, token=None, **kwargs): super().__init__(**kwargs) self.airflow_url = AIRFLOW_SERVER_URL if token: self.airflow_client = AirflowClient(refresh_token=token) else: self.airflow_client = AirflowClient(username=AIRFLOW_USERNAME, passwd=AIRFLOW_PASSWORD) self.default_tag = AIRFLOW_DEFAULT_DAG_TAG self.default_status_filter = ('skipped',)
def __init__(self, token=None, user=None): super().__init__(token=token, user=user) self.airflow_url = AIRFLOW_SERVER_URL if token: self.airflow_client = AirflowClient(refresh_token=token) else: self.airflow_client = AirflowClient(username=AIRFLOW_USERNAME, passwd=AIRFLOW_PASSWORD) self.default_tag = AIRFLOW_DEFAULT_DAG_TAG
class AirflowProvider(BaseProvider): provider_type = 'airflow' def __init__(self, token=None, **kwargs): super().__init__(**kwargs) self.airflow_url = AIRFLOW_SERVER_URL if token: self.airflow_client = AirflowClient(refresh_token=token) else: self.airflow_client = AirflowClient(username=AIRFLOW_USERNAME, passwd=AIRFLOW_PASSWORD) self.default_tag = AIRFLOW_DEFAULT_DAG_TAG self.default_status_filter = ('skipped',) def get_default_pack(self): return self.default_tag def _build_action_from_dag(self, dags, pack=None): """ build action from dag schema and details info :param dags: airflow client get_dags :return: """ dags_schema = dags['dags_schema'] dags_list = [] for dag_info in dags_schema: schema = dag_info['schema'] details = dag_info['details'] json_schema = schema.get('params_json_schema') dags_list.append({ 'name': schema['name'], 'parameters': schema.get('params'), 'tags': schema.get('tags', self.default_tag), 'description': schema['desc'], 'enabled': schema.get('enabled', True), # TODO: update helpdesk airflow plugin to get dag pause status 'entry_point': schema.get('entry_point'), 'metadata_file': details['filepath'], 'output_schema': schema.get('output_schema'), 'params_json_schema': json_schema, 'uid': schema['dag_id'], 'pack': pack or self.default_tag, 'ref': schema['dag_id'], 'id': schema['dag_id'], 'runner_type': schema.get('runner_type'), 'highlight_queries': schema.get('highlight_queries') or [r'\[.+_operator.py.+}} '], 'pretty_log_formatter': schema.get('pretty_task_log_formatter', {}), 'status_filter': schema.get('status_filter', self.default_status_filter) }) return tuple(dags_list) def get_actions(self, pack=None): """ :param pack: In st2 pack -> airflow dag tag :return: a list of <Action name=view_organizations,pack=trello,enabled=True,runner_type=python-script> to dict """ try: dags = self.airflow_client.get_dags(tags=(self.default_tag if not pack else pack,)) return self._build_action_from_dag(dags, pack=pack) except Exception as e: if pack: raise ResolvePackageError(e, traceback.format_exc(), f"Resolve pack {pack} error") raise e def get_action(self, ref): """ return <Action name=view_organizations,pack=trello,enabled=True,runner_type=python-script> to dict :param ref: dag_id :return: dag dict """ try: if '.' in ref: ref = ref.split('.')[-1] dag = self.airflow_client.get_schema_by_dag_id(ref) return self._build_action_from_dag({'dags_schema': [dag]})[0] except Exception as e: logging.error('get dag id {} schema error: {}'.format(ref, str(e))) return None def get_result_url(self, execution_id): dag_id, execution_date = execution_id.split('|') return self.airflow_client.build_graph_url(dag_id, execution_date) def _build_execution_from_dag(self, trigger_resp, dag_id): return { 'dag_id': dag_id, 'execution_date': trigger_resp['execution_date'], 'msg': trigger_resp['message'], 'id': f'{dag_id}|{trigger_resp["execution_date"]}', 'provider': self.provider_type, 'web_url': self.get_result_url(f'{dag_id}|{trigger_resp["execution_date"]}') } def run_action(self, ref, parameters): msg = '' try: trigger_result = self.airflow_client.trigger_dag(ref, conf=parameters) return self._build_execution_from_dag(trigger_result, ref) if trigger_result else None, msg except Exception as e: logger.error(f'run dag {ref} error: {str(e)}') return None, str(e) def generate_annotation(self, execution): if not execution: return return { 'provider': self.provider_type, 'id': execution['id'], 'result_url': self.get_result_url(execution['id']) } @staticmethod def _format_exec_status(status): status_to_emoji = { 'success': '✔️', 'running': '🏃', 'failed': '❌', 'skipped': '⏭️', 'upstream_failed': '⬆️❌', 'up_for_reschedule': '🔄', 'up_for_retry': '♻️', 'queued': '👯', 'no_status': '😿' } if status not in status_to_emoji: return status_to_emoji['no_status'] else: return status_to_emoji[status] @staticmethod def _status_to_color(status): status_to_color = { 'success': 'green', 'running': '#00ff00', 'failed': '#ff0000', 'skipped': '#fecfd7', 'upstream_failed': '#feba3f', 'up_for_reschedule': '#6fe7db', 'up_for_retry': '#fee03f', 'queued': '#808080', 'no_status': '#fafafa' } if status not in status_to_color: return status_to_color['no_status'] else: return status_to_color[status] def _dag_graph_to_gojs_flow(self, dag_id, execution): airflow_graph = self.airflow_client.get_dag_graph(dag_id) schema = self.get_action(dag_id) status_filter = schema['status_filter'] result = { 'class': 'GraphLinksModel', 'nodeDataArray': [], 'linkDataArray': [] } if airflow_graph: for node in airflow_graph['nodes']: ti = execution['task_instances'].get(node['id']) if not ti: # if new dag def update node in the dag, old result will not contain this node logger.warning(f"task_id {node['id']} can not be found in task_instance") state = 'no_status' else: state = ti['state'] if state in status_filter: continue result['nodeDataArray'].append({ 'key': node['id'], 'text': f'{self._format_exec_status(state)} {node["value"]["label"]}', 'color': node['value']['style'][5:-1], 'stroke': self._status_to_color(state) }) for edge in airflow_graph['edges']: ti_v = execution['task_instances'].get(edge['v']) ti_u = execution['task_instances'].get(edge['u']) if not ti_v or not ti_u: # if new dag def update node in the dag, old result will not contain this node logger.warning(f"task instance can not be found in task_instance") else: if any((ti_u['state'] in status_filter, ti_v['state'] in status_filter)): continue result['linkDataArray'].append({ 'to': edge['v'], 'from': edge['u'] }) return result def _get_result_highlight_queries(self, dag_id): schema = self.get_action(dag_id) return schema['highlight_queries'] def _build_result_from_dag_exec(self, execution, execution_id): dag_id, execution_date = execution_id.split('|') schema = self.get_action(dag_id) highlight_queries = schema['highlight_queries'] status_filter = schema['status_filter'] result = { 'status': execution['status'], 'start_timestamp': execution_date, 'web_url': self.get_result_url(execution_id), 'result': { 'tasks': [], 'dag_id': dag_id }, # frontend only render subtab when len(result) == 2 'id': execution_id, 'graph': self._dag_graph_to_gojs_flow(dag_id, execution), } for task_id in execution['dag_info']['details']['task_ids']: task_instance = execution['task_instances'].get(task_id) if not task_instance: # if new dag def update node in the dag, old result will not contain this node logger.warning(f"task_id {task_id} can not be found in task_instance") # fake task instance task_instance = AIRFLOW_FAKE_TI # do not show specific status node if status_filter and task_instance['state'] in status_filter: continue is_task_success = task_instance['state'] == 'success' tasks_result = {} task_tried_times = task_instance['try_number'] - 1 # check log result return if task_tried_times <= 0: tasks_result = { task_id: { 'failed': True, 'stderr': task_instance['state'], 'return_code': 1, 'succeeded': False, 'stdout': '' } } else: for tries_time in range(task_tried_times): output_execution_date = self.airflow_client.get_out_put_id_date(execution_date) msg = { 'output_load': True, 'query_string': f'?exec_output_id={dag_id}|{output_execution_date}|{task_id}|{tries_time+1}' } is_success_try = tries_time + 1 == task_tried_times and is_task_success tasks_result[f'{task_id} -> [{tries_time+1}/{task_tried_times}]'] = { 'failed': tries_time + 1 != task_tried_times or not is_task_success, 'stderr': msg if not is_success_try else '', 'return_code': int(not is_success_try), 'succeeded': is_success_try, 'stdout': msg if is_success_try else '', 'highlight_queries': highlight_queries, } result['result']['tasks'].append({ 'execution_id': f'{execution_id}|{task_id}', 'created_at': task_instance['start_date'], 'updated_at': task_instance['end_date'], 'state': 'succeeded' if is_task_success else 'failed', 'result': tasks_result, 'id': task_id, 'name': f'{self._format_exec_status(task_instance["state"])} {task_id}' }) return result def get_execution(self, execution_id): dag_id, execution_date = execution_id.split('|') try: execution = self.airflow_client.get_dag_result(dag_id, execution_date) return self._build_result_from_dag_exec( execution, execution_id) if execution else None, '' except Exception as e: logger.error(f'get execution from {execution_id}, error: {traceback.format_exc()}') return None, str(e) def get_execution_output(self, execution_output_id): dag_id, execution_date, task_id, try_number = execution_output_id.split('|') std_out = self.airflow_client.get_task_result(dag_id, execution_date, task_id, try_number) # check log result return if 'message' not in std_out['result'][task_id]: msg = std_out['result'][task_id].get('message') or (json.dumps(std_out),) return msg, 'get execution output error' else: return {'message': std_out['result'][task_id]['message'], 'pretty_log': std_out['result'][task_id].get('pretty_log', {})}, ''
class AirflowProvider(LdapProviderMixin, Provider): provider_type = 'airflow' def __init__(self, token=None, user=None): super().__init__(token=token, user=user) self.airflow_url = AIRFLOW_SERVER_URL if token: self.airflow_client = AirflowClient(refresh_token=token) else: self.airflow_client = AirflowClient(username=AIRFLOW_USERNAME, passwd=AIRFLOW_PASSWORD) self.default_tag = AIRFLOW_DEFAULT_DAG_TAG def get_default_pack(self): return self.default_tag def get_user_email(self, user=None): user = user or self.user return self.get_user_email_from_ldap( user) or '%s@%s' % (user, DEFAULT_EMAIL_DOMAIN) def _build_action_from_dag(self, dags, pack=None): """ build action from dag schema and details info :param dags: airflow client get_dags :return: """ dags_schema = dags['dags_schema'] dags_list = [] for dag_info in dags_schema: schema = dag_info['schema'] details = dag_info['details'] dags_list.append({ 'name': schema['name'], 'parameters': schema.get('params'), 'tags': schema.get('tags', self.default_tag), 'description': schema['desc'], 'enabled': schema.get( 'enabled', True ), # TODO: update helpdesk airflow plugin to get dag pause status 'entry_point': schema.get('entry_point'), 'metadata_file': details['filepath'], 'output_schema': schema.get('output_schema'), 'uid': schema['dag_id'], 'pack': pack or self.default_tag, 'ref': schema['dag_id'], 'id': schema['dag_id'], 'runner_type': schema.get('runner_type') }) return tuple(dags_list) def get_actions(self, pack=None): """ :param pack: In st2 pack -> airflow dag tag :return: a list of <Action name=view_organizations,pack=trello,enabled=True,runner_type=python-script> to dict """ dags = self.airflow_client.get_dags( tags=(self.default_tag if not pack else pack, )) return self._build_action_from_dag(dags, pack=pack) def get_action(self, ref): """ return <Action name=view_organizations,pack=trello,enabled=True,runner_type=python-script> to dict :param ref: dag_id :return: dag dict """ try: if '.' in ref: ref = ref.split('.')[-1] dag = self.airflow_client.get_schema_by_dag_id(ref) return self._build_action_from_dag({'dags_schema': [dag]})[0] except Exception as e: logging.error('get dag id {} schema error: {}'.format(ref, str(e))) return None def get_result_url(self, execution_id): dag_id, execution_date = execution_id.split('|') return self.airflow_client.build_graph_url(dag_id, execution_date) def _build_execution_from_dag(self, trigger_resp, dag_id): return { 'dag_id': dag_id, 'execution_date': trigger_resp['execution_date'], 'msg': trigger_resp['message'], 'id': f'{dag_id}|{trigger_resp["execution_date"]}', 'provider': self.provider_type, 'web_url': self.get_result_url(f'{dag_id}|{trigger_resp["execution_date"]}') } def run_action(self, ref, parameters): msg = '' try: trigger_result = self.airflow_client.trigger_dag(ref, conf=parameters) return self._build_execution_from_dag( trigger_result, ref) if trigger_result else None, msg except Exception as e: logger.error(f'run dag {ref} error: {str(e)}') return None, str(e) def generate_annotation(self, execution): if not execution: return return { 'provider': self.provider_type, 'id': execution['id'], 'result_url': self.get_result_url(execution['id']) } @staticmethod def _format_exec_status(status): status_to_emoji = { 'success': '✔️', 'running': '🏃', 'failed': '❌', 'skipped': '⏭️', 'upstream_failed': '⬆️❌', 'up_for_reschedule': '🔄', 'up_for_retry': '♻️', 'queued': '👯', 'no_status': '😿' } if status not in status_to_emoji: return status_to_emoji['no_status'] else: return status_to_emoji[status] def _build_result_from_dag_exec(self, execution, execution_id, filter_status=None): dag_id, execution_date = execution_id.split('|') result = { 'status': execution['status'], 'start_timestamp': execution_date, 'web_url': self.get_result_url(execution_id), 'result': { 'tasks': [], 'dag_id': dag_id }, # frontend only render subtab when len(result) == 2 'id': execution_id, } for task_id in execution['dag_info']['details']['task_ids']: task_instance = execution['task_instances'].get(task_id) if not task_instance: # if new dag def update node in the dag, old result will not contain this node logger.warning( f"task_id {task_id} can not be found in task_instance") continue if filter_status and task_instance['state'] in filter_status: continue is_task_success = task_instance['state'] == 'success' tasks_result = {} task_tried_times = task_instance['try_number'] - 1 # check log result return if task_tried_times <= 0: tasks_result = { task_id: { 'failed': True, 'stderr': task_instance['state'], 'return_code': 1, 'succeeded': False, 'stdout': '' } } else: for tries_time in range(task_tried_times): output_execution_date = self.airflow_client.get_out_put_id_date( execution_date) msg = { 'output_load': True, 'query_string': f'?exec_output_id={dag_id}|{output_execution_date}|{task_id}|{tries_time+1}' } is_success_try = tries_time + 1 == task_tried_times and is_task_success tasks_result[ f'{task_id} -> [{tries_time+1}/{task_tried_times}]'] = { 'failed': tries_time + 1 != task_tried_times or not is_task_success, 'stderr': msg if not is_success_try else '', 'return_code': int(not is_success_try), 'succeeded': is_success_try, 'stdout': msg if is_success_try else '' } result['result']['tasks'].append({ 'execution_id': f'{execution_id}|{task_id}', 'created_at': task_instance['start_date'], 'updated_at': task_instance['end_date'], 'state': 'succeeded' if is_task_success else 'failed', 'result': tasks_result, 'id': task_id, 'name': f'{self._format_exec_status(task_instance["state"])} {task_id}' }) return result def get_execution(self, execution_id): dag_id, execution_date = execution_id.split('|') try: execution = self.airflow_client.get_dag_result( dag_id, execution_date) return self._build_result_from_dag_exec( execution, execution_id, filter_status=('skipped', )) if execution else None, '' except Exception as e: logger.error(f'get execution from {execution_id}, error: {str(e)}') return None, str(e) def authenticate(self, user, password=None): try: if password: token = self.airflow_client.generate_token(user, password) else: token = self.airflow_client.generate_token( AIRFLOW_USERNAME, AIRFLOW_PASSWORD) if token: return { 'token': token['refresh_token'], 'user': user, 'expiry': token['expire_time'] }, '' return None, '' except Exception as e: logger.error('auth with airflow error: {}'.format(str(e))) return None, str(e) def get_user_roles(self, user=None): '''return a list of roles, e.g. ["admin"] ''' roles = [] try: roles = self.airflow_client.get_user_roles( username=user or 'sysadmin') logger.debug('Get user roles: %s.get_user_roles(): %s', self, roles) return roles['role_from_jwt'] if not user else roles except Exception as e: logger.error('Get user role error: {}'.format(str(e))) return roles def get_execution_output(self, execution_output_id): dag_id, execution_date, task_id, try_number = execution_output_id.split( '|') std_out = self.airflow_client.get_task_result(dag_id, execution_date, task_id, try_number) # check log result return if 'message' not in std_out['result'][task_id]: msg = std_out['result'][task_id].get('message') or ( json.dumps(std_out), ) return msg, 'get execution output error' else: return std_out['result'][task_id]['message'], ''