class PromenadeBaseOperator(BaseOperator): """Promenade Base Operator All promenade related workflow operators will use the promenade base operator as the parent and inherit attributes and methods from this class """ @apply_defaults def __init__(self, main_dag_name=None, promenade_svc_endpoint=None, promenade_svc_type='kubernetesprovisioner', redeploy_server=None, shipyard_conf=None, sub_dag_name=None, svc_token=None, xcom_push=True, *args, **kwargs): """Initialization of PromenadeBaseOperator object. :param main_dag_name: Parent Dag :param promenade_svc_endpoint: Promenade Service Endpoint :param promenade_svc_type: Promenade Service Type :param redeploy_server: Server to be redeployed :param shipyard_conf: Path of shipyard.conf :param sub_dag_name: Child Dag :param svc_token: Keystone Token :param xcom_push: xcom usage The Drydock operator assumes that prior steps have set xcoms for the action and the deployment configuration """ super(PromenadeBaseOperator, self).__init__(*args, **kwargs) self.main_dag_name = main_dag_name self.promenade_svc_endpoint = promenade_svc_endpoint self.promenade_svc_type = promenade_svc_type self.redeploy_server = redeploy_server self.shipyard_conf = shipyard_conf self.sub_dag_name = sub_dag_name self.svc_token = svc_token self.xcom_push_flag = xcom_push def execute(self, context): # Execute promenade base function self.promenade_base(context) # Exeute child function self.do_execute() @shipyard_service_token def promenade_base(self, context): # Define task_instance task_instance = context['task_instance'] # Set up and retrieve values from xcom self.xcom_puller = XcomPuller(self.main_dag_name, task_instance) self.action_info = self.xcom_puller.get_action_info() self.dc = self.xcom_puller.get_deployment_configuration() # Logs uuid of Shipyard action logging.info("Executing Shipyard Action %s", self.action_info['id']) # Retrieve information of the server that we want to redeploy # if user executes the 'redeploy_server' dag if self.action_info['dag_id'] == 'redeploy_server': self.redeploy_server = self.action_info['parameters'].get( 'server-name') if self.redeploy_server: logging.info("Server to be redeployed is %s", self.redeploy_server) else: raise AirflowException('%s was unable to retrieve the ' 'server to be redeployed.' % self.__class__.__name__) # Retrieve promenade endpoint self.promenade_svc_endpoint = ucp_service_endpoint( self, svc_type=self.promenade_svc_type) logging.info("Promenade endpoint is %s", self.promenade_svc_endpoint)
class DrydockBaseOperator(BaseOperator): """Drydock Base Operator All drydock related workflow operators will use the drydock base operator as the parent and inherit attributes and methods from this class """ @apply_defaults def __init__(self, deckhand_design_ref=None, deckhand_svc_type='deckhand', drydock_client=None, drydock_svc_endpoint=None, drydock_svc_type='physicalprovisioner', drydock_task_id=None, main_dag_name=None, node_filter=None, redeploy_server=None, shipyard_conf=None, sub_dag_name=None, svc_session=None, svc_token=None, xcom_push=True, *args, **kwargs): """Initialization of DrydockBaseOperator object. :param deckhand_design_ref: A URI reference to the design documents :param deckhand_svc_type: Deckhand Service Type :param drydockclient: An instance of drydock client :param drydock_svc_endpoint: Drydock Service Endpoint :param drydock_svc_type: Drydock Service Type :param drydock_task_id: Drydock Task ID :param main_dag_name: Parent Dag :param node_filter: A filter for narrowing the scope of the task. Valid fields are 'node_names', 'rack_names', 'node_tags'. Note that node filter is turned off by default, i.e. all nodes will be deployed. :param redeploy_server: Server to be redeployed :param shipyard_conf: Location of shipyard.conf :param sub_dag_name: Child Dag :param svc_session: Keystone Session :param svc_token: Keystone Token :param xcom_push: xcom usage The Drydock operator assumes that prior steps have set xcoms for the action and the deployment configuration """ super(DrydockBaseOperator, self).__init__(*args, **kwargs) self.deckhand_design_ref = deckhand_design_ref self.deckhand_svc_type = deckhand_svc_type self.drydock_client = drydock_client self.drydock_svc_endpoint = drydock_svc_endpoint self.drydock_svc_type = drydock_svc_type self.drydock_task_id = drydock_task_id self.main_dag_name = main_dag_name self.node_filter = node_filter self.redeploy_server = redeploy_server self.shipyard_conf = shipyard_conf self.sub_dag_name = sub_dag_name self.svc_session = svc_session self.svc_token = svc_token self.xcom_push_flag = xcom_push def execute(self, context): # Execute drydock base function self.drydock_base(context) # Exeute child function self.do_execute() def drydock_base(self, context): # Initialize Variables drydock_url = None dd_session = None # Define task_instance task_instance = context['task_instance'] # Set up and retrieve values from xcom self.xcom_puller = XcomPuller(self.main_dag_name, task_instance) self.action_info = self.xcom_puller.get_action_info() self.dc = self.xcom_puller.get_deployment_configuration() # Logs uuid of action performed by the Operator logging.info("DryDock Operator for action %s", self.action_info['id']) # Retrieve information of the server that we want to redeploy if user # executes the 'redeploy_server' dag # Set node filter to be the server that we want to redeploy if self.action_info['dag_id'] == 'redeploy_server': self.redeploy_server = ( self.action_info['parameters']['server-name']) if self.redeploy_server: logging.info("Server to be redeployed is %s", self.redeploy_server) self.node_filter = self.redeploy_server else: raise AirflowException('Unable to retrieve information of ' 'node to be redeployed!') # Retrieve Endpoint Information self.drydock_svc_endpoint = ucp_service_endpoint( self, svc_type=self.drydock_svc_type) logging.info("Drydock endpoint is %s", self.drydock_svc_endpoint) # Parse DryDock Service Endpoint drydock_url = urlparse(self.drydock_svc_endpoint) # Build a DrydockSession with credentials and target host # information. # The DrydockSession will care for TCP connection pooling # and header management logging.info("Build DryDock Session") dd_session = session.DrydockSession(drydock_url.hostname, port=drydock_url.port, auth_gen=self._auth_gen) # Raise Exception if we are not able to set up the session if dd_session: logging.info("Successfully Set Up DryDock Session") else: raise AirflowException("Failed to set up Drydock Session!") # Use the DrydockSession to build a DrydockClient that can # be used to make one or more API calls logging.info("Create DryDock Client") self.drydock_client = client.DrydockClient(dd_session) # Raise Exception if we are not able to build the client if self.drydock_client: logging.info("Successfully Set Up DryDock client") else: raise AirflowException("Failed to set up Drydock Client!") # Retrieve DeckHand Endpoint Information deckhand_svc_endpoint = ucp_service_endpoint( self, svc_type=self.deckhand_svc_type) logging.info("Deckhand endpoint is %s", deckhand_svc_endpoint) # Retrieve last committed revision id committed_revision_id = self.xcom_puller.get_design_version() # Form DeckHand Design Reference Path # This URL will be used to retrieve the Site Design YAMLs deckhand_path = "deckhand+" + deckhand_svc_endpoint self.deckhand_design_ref = os.path.join(deckhand_path, "revisions", str(committed_revision_id), "rendered-documents") if self.deckhand_design_ref: logging.info("Design YAMLs will be retrieved from %s", self.deckhand_design_ref) else: raise AirflowException("Unable to Retrieve Design Reference!") @shipyard_service_token def _auth_gen(self): # Generator method for the Drydock Session to use to get the # auth headers necessary return [('X-Auth-Token', self.svc_token)] def create_task(self, task_action): # Initialize Variables create_task_response = {} # Node Filter logging.info("Nodes Filter List: %s", self.node_filter) try: # Create Task create_task_response = self.drydock_client.create_task( design_ref=self.deckhand_design_ref, task_action=task_action, node_filter=self.node_filter) except errors.ClientError as client_error: raise AirflowException(client_error) # Retrieve Task ID self.drydock_task_id = create_task_response['task_id'] logging.info('Drydock %s task ID is %s', task_action, self.drydock_task_id) # Raise Exception if we are not able to get the task_id from # Drydock if self.drydock_task_id: return self.drydock_task_id else: raise AirflowException("Unable to create task!") def query_task(self, interval, time_out): # Calculate number of times to execute the 'for' loop # Convert 'time_out' and 'interval' from string into integer # The result from the division will be a floating number which # We will round off to nearest whole number end_range = round(int(time_out) / int(interval)) logging.info('Task ID is %s', self.drydock_task_id) # Query task status for i in range(0, end_range + 1): try: # Retrieve current task state task_state = self.drydock_client.get_task( task_id=self.drydock_task_id) task_status = task_state['status'] task_result = task_state['result']['status'] logging.info("Current status of task id %s is %s", self.drydock_task_id, task_status) except errors.ClientError as client_error: raise AirflowException(client_error) except: # There can be situations where there are intermittent network # issues that prevents us from retrieving the task state. We # will want to retry in such situations. logging.warning("Unable to retrieve task state. Retrying...") # Raise Time Out Exception if task_status == 'running' and i == end_range: self.task_failure(False) # Exit 'for' loop if the task is in 'complete' or 'terminated' # state if task_status in ['complete', 'terminated']: logging.info('Task result is %s', task_result) break else: time.sleep(int(interval)) # Get final task result if task_result == 'success': logging.info('Task id %s has been successfully completed', self.drydock_task_id) else: self.task_failure(True) def task_failure(self, _task_failure): logging.info('Retrieving all tasks records from Drydock...') try: # Get all tasks records all_tasks = self.drydock_client.get_tasks() # Create a dictionary of tasks records with 'task_id' as key all_task_ids = {t['task_id']: t for t in all_tasks} except errors.ClientError as client_error: raise AirflowException(client_error) # Retrieve the failed parent task and assign it to list failed_task = ([ x for x in all_tasks if x['task_id'] == self.drydock_task_id ]) # Print detailed information of failed parent task in json output # Since there is only 1 failed parent task, we will print index 0 # of the list if failed_task: logging.error('%s task has either failed or timed out', failed_task[0]['action']) logging.error(json.dumps(failed_task[0], indent=4, sort_keys=True)) # Get the list of subtasks belonging to the failed parent task subtask_id_list = failed_task[0]['subtask_id_list'] logging.info("Printing information of failed sub-tasks...") # Print detailed information of failed step(s) under each subtask # This will help to provide additional information for troubleshooting # purpose. for subtask_id in subtask_id_list: logging.info("Retrieving details of subtask %s...", subtask_id) # Retrieve task information task = all_task_ids.get(subtask_id) if task: # Print subtask action and state logging.info("%s subtask is in %s state", task['action'], task['result']['status']) # Print list containing steps in failure state if task['result']['failures']: logging.error("The following steps have failed:") logging.error(task['result']['failures']) message_list = (task['result']['details']['messageList'] or []) # Print information of failed steps for message in message_list: is_error = message['error'] is True if is_error: logging.error( json.dumps(message, indent=4, sort_keys=True)) else: logging.info("No failed step detected for subtask %s", subtask_id) else: raise AirflowException("Unable to retrieve subtask info!") # Raise Exception to terminate workflow if _task_failure: raise AirflowException("Failed to Execute/Complete Task!") else: raise AirflowException("Task Execution Timed Out!")
class UcpBaseOperator(BaseOperator): """Airship Base Operator All Airship related workflow operators will use the Airship base operator as the parent and inherit attributes and methods from this class """ @apply_defaults def __init__(self, main_dag_name=None, pod_selector_pattern=None, shipyard_conf=None, start_time=None, xcom_push=True, *args, **kwargs): """Initialization of UcpBaseOperator object. :param continue_processing: A boolean value on whether to continue with the workflow. Defaults to True. :param main_dag_name: Parent Dag :param pod_selector_pattern: A list containing the information on the patterns of the Pod name and name of the associated container for log queries. This will allow us to query multiple components, e.g. MAAS and Drydock at the same time. It also allows us to query the logs of specific container in Pods with multiple containers. For instance the Airflow worker pod contains both the airflow-worker container and the log-rotate container. :param shipyard_conf: Location of shipyard.conf :param start_time: Time when Operator gets executed :param xcom_push: xcom usage """ super(UcpBaseOperator, self).__init__(*args, **kwargs) self.continue_processing = True self.main_dag_name = main_dag_name self.pod_selector_pattern = pod_selector_pattern or [] self.shipyard_conf = shipyard_conf self.start_time = datetime.now() self.xcom_push_flag = xcom_push # lazy init field to hold a shipyard_db_engine self._shipyard_db_engine = None def execute(self, context): # Setup values that depend on the shipyard configuration self.doc_utils = _get_document_util(self.shipyard_conf) self.endpoints = service_endpoint.ServiceEndpoints(self.shipyard_conf) # Read and parse shiyard.conf self.config = configparser.ConfigParser() self.config.read(self.shipyard_conf) # Execute Airship base function self.ucp_base(context) # Execute base function for child operator self.run_base(context) if self.continue_processing: # Execute child function try: self.do_execute() except Exception: LOG.exception( 'Exception happened during %s execution, ' 'will try to log additional details', self.__class__.__name__) self.get_k8s_logs() if hasattr(self, 'fetch_failure_details'): self.fetch_failure_details() raise def ucp_base(self, context): LOG.info("Running Airship Base Operator...") # Configure the notes helper for this run of an operator # establishes self.notes_helper self._setup_notes_helper() # Initialize variable that indicates the kubernetes namespace for the # Airship components self.ucp_namespace = self.config.get(K8S_LOGS, 'ucp_namespace') # Define task_instance self.task_instance = context['task_instance'] # Set up and retrieve values from xcom self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance) self.action_info = self.xcom_puller.get_action_info() self.action_type = self.xcom_puller.get_action_type() self.dc = self.xcom_puller.get_deployment_configuration() # Set up other common-use values self.action_id = self.action_info['id'] # extract the `task` or `step` name for easy access self.task_id = self.task_instance.task_id self.revision_id = self.action_info['committed_rev_id'] self.action_params = self.action_info.get('parameters', {}) self.design_ref = self._deckhand_design_ref() self._setup_target_nodes() def get_k8s_logs(self): """Retrieve Kubernetes pod/container logs specified by an operator This method is "best effort" and should not prevent the progress of the workflow processing """ if self.pod_selector_pattern: for selector in self.pod_selector_pattern: # Get difference in current time and time when the # operator was first executed (in seconds) t_diff = (datetime.now() - self.start_time).total_seconds() # Note that we will end up with a floating number for # 't_diff' and will need to round it up to the nearest # integer t_diff_int = int(math.ceil(t_diff)) try: get_pod_logs(selector['pod_pattern'], self.ucp_namespace, selector['container'], t_diff_int) except K8sLoggingException as e: LOG.error(e) else: LOG.debug("There are no pod logs specified to retrieve") def _setup_target_nodes(self): """Sets up the target nodes field for this action When managing a targeted action, this step needs to resolve the target node. If there are no targets found (should be caught before invocation of the DAG), then raise an exception so that it does not try to take action on more nodes than targeted. Later, when creating the deployment group, if this value (self.target_nodes) is set, it will be used in lieu of the design based deployment strategy. target_nodes will be a comma separated string provided as part of the parameters to an action on input to Shipyard. """ if self.action_type == 'targeted': t_nodes = self.action_params.get('target_nodes', '') self.target_nodes = [n.strip() for n in t_nodes.split(',')] if not self.target_nodes: raise AirflowException( '{} ({}) requires targeted nodes, but was unable to ' 'resolve any targets in {}'.format( self.main_dag_name, self.action_id, self.__class__.__name__)) LOG.info("Target Nodes for action: [%s]", ', '.join(self.target_nodes)) else: self.target_nodes = None def _deckhand_design_ref(self): """Assemble a deckhand design_ref""" # Retrieve DeckHand Endpoint Information LOG.info("Assembling a design ref using revision: %s", self.revision_id) deckhand_svc_endpoint = self.endpoints.endpoint_by_name( service_endpoint.DECKHAND) # This URL will be used to retrieve the Site Design YAMLs deckhand_path = "deckhand+{}".format(deckhand_svc_endpoint) design_ref = os.path.join(deckhand_path, "revisions", str(self.revision_id), "rendered-documents") LOG.info("Design Reference is %s", design_ref) return design_ref def get_unique_doc(self, schema, name, revision_id=None): """Retrieve a specific document from Deckhand :param schema: the schema of the document :param name: the metadata.name of the document :param revision_id: the deckhand revision, or defaults to self.revision_id Wraps the document_validation_utils call to get the same. Returns the sepcified document or raises an Airflow exception. """ if revision_id is None: revision_id = self.revision_id LOG.info("Retrieve shipyard/DeploymentConfiguration/v1, " "deployment-configuration from Deckhand") try: return self.doc_utils.get_unique_doc(revision_id=revision_id, name=name, schema=schema) except Exception as ex: LOG.error( "A document was expected to be available: Name: %s, " "Schema: %s, Deckhand revision: %s, but there was an " "error attempting to retrieve it. Since this document's " "contents may be critical to the proper operation of " "the workflow, this is fatal.", schema, name, revision_id) LOG.exception(ex) # if the document is not found for ANY reason, the workflow is # broken. Raise an Airflow Exception. raise AirflowException(ex) def _get_shipyard_db_engine(self): """Lazy initialize an engine for the Shipyard database. :returns: a SQLAlchemy engine for the Shipyard database. Developer's Note: Initially the idea was to use the PostgresHook and retrieve an engine from there as is done with the concurrency check, but since we have easy access to a configuration file, this does direct SQLAlchemy to get the engine. By using the config, the database connection is not exposed as environment variables -- which is one way that Airflow registers database connections for use by the dbApiHook """ if self._shipyard_db_engine is None: connection_string = self.config.get(BASE, 'postgresql_db') pool_size = self.config.getint(BASE, 'pool_size') max_overflow = self.config.getint(BASE, 'pool_overflow') pool_pre_ping = self.config.getboolean(BASE, 'pool_pre_ping') pool_recycle = self.config.getint(BASE, 'connection_recycle') pool_timeout = self.config.getint(BASE, 'pool_timeout') self._shipyard_db_engine = sqlalchemy.create_engine( connection_string, pool_size=pool_size, max_overflow=max_overflow, pool_pre_ping=pool_pre_ping, pool_recycle=pool_recycle, pool_timeout=pool_timeout) LOG.info( "Initialized Shipyard database connection with pool " "size: %d, max overflow: %d, pool pre ping: %s, pool " "recycle: %d, and pool timeout: %d", pool_size, max_overflow, pool_pre_ping, pool_recycle, pool_timeout) return self._shipyard_db_engine @shipyard_service_token def _token_getter(self): # Generator method to get a shipyard service token return self.svc_token def _setup_notes_helper(self): """Setup a notes helper for use by all descendent operators""" connect_timeout = self.config.get(REQUESTS_CONFIG, 'notes_connect_timeout') read_timeout = self.config.get(REQUESTS_CONFIG, 'notes_read_timeout') self.notes_helper = NotesHelper( NotesManager(storage=ShipyardSQLNotesStorage( self._get_shipyard_db_engine), get_token=self._token_getter, connect_timeout=connect_timeout, read_timeout=read_timeout))
class UcpBaseOperator(BaseOperator): """UCP Base Operator All UCP related workflow operators will use the UCP base operator as the parent and inherit attributes and methods from this class """ @apply_defaults def __init__(self, main_dag_name=None, pod_selector_pattern=None, shipyard_conf=None, start_time=None, sub_dag_name=None, xcom_push=True, *args, **kwargs): """Initialization of UcpBaseOperator object. :param continue_processing: A boolean value on whether to continue with the workflow. Defaults to True. :param main_dag_name: Parent Dag :param pod_selector_pattern: A list containing the information on the patterns of the Pod name and name of the associated container for log queries. This will allow us to query multiple components, e.g. MAAS and Drydock at the same time. It also allows us to query the logs of specific container in Pods with multiple containers. For instance the Airflow worker pod contains both the airflow-worker container and the log-rotate container. :param shipyard_conf: Location of shipyard.conf :param start_time: Time when Operator gets executed :param sub_dag_name: Child Dag :param xcom_push: xcom usage """ super(UcpBaseOperator, self).__init__(*args, **kwargs) self.continue_processing = True self.main_dag_name = main_dag_name self.pod_selector_pattern = pod_selector_pattern or [] self.shipyard_conf = shipyard_conf self.start_time = datetime.now() self.sub_dag_name = sub_dag_name self.xcom_push_flag = xcom_push def execute(self, context): # Execute UCP base function self.ucp_base(context) # Execute base function self.run_base(context) if self.continue_processing: # Exeute child function self.do_execute() def ucp_base(self, context): LOG.info("Running UCP Base Operator...") # Read and parse shiyard.conf config = configparser.ConfigParser() config.read(self.shipyard_conf) # Initialize variable self.ucp_namespace = config.get('k8s_logs', 'ucp_namespace') # Define task_instance self.task_instance = context['task_instance'] # Set up and retrieve values from xcom self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance) self.action_info = self.xcom_puller.get_action_info() self.dc = self.xcom_puller.get_deployment_configuration() self.revision_id = self.action_info['committed_rev_id'] def get_k8s_logs(self): """Retrieve Kubernetes pod/container logs specified by an opererator This method is "best effort" and should not prevent the progress of the workflow processing """ if self.pod_selector_pattern: for selector in self.pod_selector_pattern: # Get difference in current time and time when the # operator was first executed (in seconds) t_diff = (datetime.now() - self.start_time).total_seconds() # Note that we will end up with a floating number for # 't_diff' and will need to round it up to the nearest # integer t_diff_int = int(math.ceil(t_diff)) try: get_pod_logs(selector['pod_pattern'], self.ucp_namespace, selector['container'], t_diff_int) except K8sLoggingException as e: LOG.error(e) else: LOG.debug("There are no pod logs specified to retrieve")
class UcpBaseOperator(BaseOperator): """UCP Base Operator All UCP related workflow operators will use the UCP base operator as the parent and inherit attributes and methods from this class """ @apply_defaults def __init__(self, main_dag_name=None, pod_selector_pattern=None, shipyard_conf=None, start_time=None, xcom_push=True, *args, **kwargs): """Initialization of UcpBaseOperator object. :param continue_processing: A boolean value on whether to continue with the workflow. Defaults to True. :param main_dag_name: Parent Dag :param pod_selector_pattern: A list containing the information on the patterns of the Pod name and name of the associated container for log queries. This will allow us to query multiple components, e.g. MAAS and Drydock at the same time. It also allows us to query the logs of specific container in Pods with multiple containers. For instance the Airflow worker pod contains both the airflow-worker container and the log-rotate container. :param shipyard_conf: Location of shipyard.conf :param start_time: Time when Operator gets executed :param xcom_push: xcom usage """ super(UcpBaseOperator, self).__init__(*args, **kwargs) self.continue_processing = True self.main_dag_name = main_dag_name self.pod_selector_pattern = pod_selector_pattern or [] self.shipyard_conf = shipyard_conf self.start_time = datetime.now() self.xcom_push_flag = xcom_push self.doc_utils = _get_document_util(self.shipyard_conf) self.endpoints = service_endpoint.ServiceEndpoints(self.shipyard_conf) def execute(self, context): # Execute UCP base function self.ucp_base(context) # Execute base function self.run_base(context) if self.continue_processing: # Exeute child function self.do_execute() def ucp_base(self, context): LOG.info("Running UCP Base Operator...") # Read and parse shiyard.conf config = configparser.ConfigParser() config.read(self.shipyard_conf) # Initialize variable self.ucp_namespace = config.get('k8s_logs', 'ucp_namespace') # Define task_instance self.task_instance = context['task_instance'] # Set up and retrieve values from xcom self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance) self.action_info = self.xcom_puller.get_action_info() self.dc = self.xcom_puller.get_deployment_configuration() self.revision_id = self.action_info['committed_rev_id'] self.design_ref = self._deckhand_design_ref() def get_k8s_logs(self): """Retrieve Kubernetes pod/container logs specified by an opererator This method is "best effort" and should not prevent the progress of the workflow processing """ if self.pod_selector_pattern: for selector in self.pod_selector_pattern: # Get difference in current time and time when the # operator was first executed (in seconds) t_diff = (datetime.now() - self.start_time).total_seconds() # Note that we will end up with a floating number for # 't_diff' and will need to round it up to the nearest # integer t_diff_int = int(math.ceil(t_diff)) try: get_pod_logs(selector['pod_pattern'], self.ucp_namespace, selector['container'], t_diff_int) except K8sLoggingException as e: LOG.error(e) else: LOG.debug("There are no pod logs specified to retrieve") def _deckhand_design_ref(self): """Assemble a deckhand design_ref""" # Retrieve DeckHand Endpoint Information LOG.info("Assembling a design ref using revision: %s", self.revision_id) deckhand_svc_endpoint = self.endpoints.endpoint_by_name( service_endpoint.DECKHAND) # This URL will be used to retrieve the Site Design YAMLs deckhand_path = "deckhand+{}".format(deckhand_svc_endpoint) design_ref = os.path.join(deckhand_path, "revisions", str(self.revision_id), "rendered-documents") LOG.info("Design Reference is %s", design_ref) return design_ref def get_unique_doc(self, schema, name, revision_id=None): """Retrieve a specific document from Deckhand :param schema: the schema of the document :param name: the metadata.name of the document :param revision_id: the deckhand revision, or defaults to self.revision_id Wraps the document_validation_utils call to get the same. Returns the sepcified document or raises an Airflow exception. """ if revision_id is None: revision_id = self.revision_id LOG.info("Retrieve shipyard/DeploymentConfiguration/v1, " "deployment-configuration from Deckhand") try: return self.doc_utils.get_unique_doc(revision_id=revision_id, name=name, schema=schema) except Exception as ex: LOG.error( "A document was expected to be available: Name: %s, " "Schema: %s, Deckhand revision: %s, but there was an " "error attempting to retrieve it. Since this document's " "contents may be critical to the proper operation of " "the workflow, this is fatal.", schema, name, revision_id) LOG.exception(ex) # if the document is not found for ANY reason, the workflow is # broken. Raise an Airflow Exception. raise AirflowException(ex)
class ArmadaBaseOperator(BaseOperator): """Armada Base Operator All armada related workflow operators will use the aramda base operator as the parent and inherit attributes and methods from this class """ @apply_defaults def __init__(self, armada_svc_type='armada', deckhand_svc_type='deckhand', main_dag_name=None, query={}, shipyard_conf=None, sub_dag_name=None, svc_session=None, svc_token=None, xcom_push=True, *args, **kwargs): """Initialization of ArmadaBaseOperator object. :param armada_svc_type: Armada Service Type :param deckhand_svc_type: Deckhand Service Type :param main_dag_name: Parent Dag :param query: A dictionary containing explicit query string parameters :param shipyard_conf: Location of shipyard.conf :param sub_dag_name: Child Dag :param svc_session: Keystone Session :param svc_token: Keystone Token :param xcom_push: xcom usage The Armada operator assumes that prior steps have set xcoms for the action and the deployment configuration """ super(ArmadaBaseOperator, self).__init__(*args, **kwargs) self.armada_svc_type = armada_svc_type self.deckhand_svc_type = deckhand_svc_type self.main_dag_name = main_dag_name self.query = query self.shipyard_conf = shipyard_conf self.sub_dag_name = sub_dag_name self.svc_session = svc_session self.svc_token = svc_token self.xcom_push_flag = xcom_push def execute(self, context): # Execute armada base function self.armada_base(context) # Exeute child function self.do_execute() @shipyard_service_token def armada_base(self, context): # Define task_instance self.task_instance = context['task_instance'] # Set up and retrieve values from xcom self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance) self.action_info = self.xcom_puller.get_action_info() self.dc = self.xcom_puller.get_deployment_configuration() # Set up xcom_pusher to push values to xcom self.xcom_pusher = XcomPusher(self.task_instance) # Logs uuid of action performed by the Operator logging.info("Armada Operator for action %s", self.action_info['id']) # Retrieve Endpoint Information armada_svc_endpoint = ucp_service_endpoint( self, svc_type=self.armada_svc_type) # Set up armada client self.armada_client = self._init_armada_client(armada_svc_endpoint, self.svc_token) # Retrieve DeckHand Endpoint Information deckhand_svc_endpoint = ucp_service_endpoint( self, svc_type=self.deckhand_svc_type) # Retrieve last committed revision id committed_revision_id = self.xcom_puller.get_design_version() # Get deckhand design reference url self.deckhand_design_ref = self._init_deckhand_design_ref( deckhand_svc_endpoint, committed_revision_id) @staticmethod def _init_armada_client(armada_svc_endpoint, svc_token): logging.info("Armada endpoint is %s", armada_svc_endpoint) # Parse Armada Service Endpoint armada_url = urlparse(armada_svc_endpoint) # Build a ArmadaSession with credentials and target host # information. logging.info("Build Armada Session") a_session = session.ArmadaSession(host=armada_url.hostname, port=armada_url.port, scheme='http', token=svc_token, marker=None) # Raise Exception if we are not able to set up the session if a_session: logging.info("Successfully Set Up Armada Session") else: raise AirflowException("Failed to set up Armada Session!") # Use the ArmadaSession to build a ArmadaClient that can # be used to make one or more API calls logging.info("Create Armada Client") _armada_client = client.ArmadaClient(a_session) # Raise Exception if we are not able to build armada client if _armada_client: logging.info("Successfully Set Up Armada client") return _armada_client else: raise AirflowException("Failed to set up Armada client!") @staticmethod def _init_deckhand_design_ref(deckhand_svc_endpoint, committed_revision_id): logging.info("Deckhand endpoint is %s", deckhand_svc_endpoint) # Form DeckHand Design Reference Path # This URL will be used to retrieve the Site Design YAMLs deckhand_path = "deckhand+" + deckhand_svc_endpoint _deckhand_design_ref = os.path.join(deckhand_path, "revisions", str(committed_revision_id), "rendered-documents") if _deckhand_design_ref: logging.info("Design YAMLs will be retrieved from %s", _deckhand_design_ref) return _deckhand_design_ref else: raise AirflowException("Unable to Retrieve Design Reference!") @get_pod_port_ip('tiller', namespace='kube-system') def get_tiller_info(self, pods_ip_port={}): # Assign value to the 'query' dictionary so that we can pass # it via the Armada Client self.query['tiller_host'] = pods_ip_port['tiller']['ip'] self.query['tiller_port'] = pods_ip_port['tiller']['port']
class UcpHealthCheckOperator(BaseOperator): """ Airship Health Checks """ @apply_defaults def __init__(self, shipyard_conf=None, main_dag_name=None, xcom_push=True, *args, **kwargs): super(UcpHealthCheckOperator, self).__init__(*args, **kwargs) self.shipyard_conf = shipyard_conf self.main_dag_name = main_dag_name self.xcom_push_flag = xcom_push self.endpoints = service_endpoint.ServiceEndpoints(self.shipyard_conf) def execute(self, context): # Initialize variable ucp_components = [ service_endpoint.ARMADA, service_endpoint.DECKHAND, service_endpoint.DRYDOCK, service_endpoint.PROMENADE, service_endpoint.SHIPYARD ] # Define task_instance self.task_instance = context['task_instance'] # Set up and retrieve values from xcom self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance) self.action_info = self.xcom_puller.get_action_info() # Set up xcom_pusher to push values to xcom self.xcom_pusher = XcomPusher(self.task_instance) # Loop through various Airship Components for component in ucp_components: # Retrieve Endpoint Information endpoint = self.endpoints.endpoint_by_name(component) LOG.info("%s endpoint is %s", component, endpoint) # Construct Health Check Endpoint healthcheck_endpoint = os.path.join(endpoint, 'health') try: LOG.info("Performing Health Check on %s at %s", component, healthcheck_endpoint) # Set health check timeout to 30 seconds req = requests.get(healthcheck_endpoint, timeout=30) # An empty response/body returned by a component means # that it is healthy if req.status_code == 204: LOG.info("%s is alive and healthy", component) except requests.exceptions.RequestException as e: self.log_health_exception(component, e) def log_health_exception(self, component, error_messages): """Logs Exceptions for health check """ # If Drydock health check fails and continue-on-fail, continue # and create xcom key 'drydock_continue_on_fail' # Note that 'update_software' does not interact with Drydock, and # therefore does not use the continue-on-fail option. if (component == service_endpoint.DRYDOCK and self.action_info['parameters'].get( 'continue-on-fail', 'false').lower() == 'true' and self.action_info['dag_id'] in ['update_site', 'deploy_site']): LOG.warning('Drydock did not pass health check. Continuing ' 'as "continue-on-fail" option is enabled.') self.xcom_pusher.xcom_push(key='drydock_continue_on_fail', value=True) else: LOG.error(error_messages) raise AirflowException("Health check failed for %s component on " "dag_id=%s. Details: %s" % (component, self.action_info.get('dag_id'), error_messages))
class DeploymentConfigurationOperator(BaseOperator): """Deployment Configuration Operator Retrieve the deployment-configuration from Deckhand for use throughout the workflow. Put the configuration into a dictionary. Failures are raised: - when Deckhand cannot be contacted - when the DeploymentConfiguration (deployment-configuration) document cannot be retrieved """ config_keys_defaults = { "physical_provisioner.deployment_strategy": None, "physical_provisioner.deploy_interval": 30, "physical_provisioner.deploy_timeout": 3600, "physical_provisioner.destroy_interval": 30, "physical_provisioner.destroy_timeout": 900, "physical_provisioner.join_wait": 120, "physical_provisioner.prepare_node_interval": 30, "physical_provisioner.prepare_node_timeout": 1800, "physical_provisioner.prepare_site_interval": 10, "physical_provisioner.prepare_site_timeout": 300, "physical_provisioner.verify_interval": 10, "physical_provisioner.verify_timeout": 60, "physical_provisioner.relabel_nodes_interval": 30, "physical_provisioner.relabel_nodes_timeout": 900, "kubernetes.node_status_interval": 30, "kubernetes.node_status_timeout": 1800, "kubernetes_provisioner.drain_timeout": 3600, "kubernetes_provisioner.drain_grace_period": 1800, "kubernetes_provisioner.clear_labels_timeout": 1800, "kubernetes_provisioner.remove_etcd_timeout": 1800, "kubernetes_provisioner.etcd_ready_timeout": 600, "armada.get_releases_timeout": 300, "armada.get_status_timeout": 300, "armada.manifest": "full-site", "armada.post_apply_timeout": 2700, "armada.validate_design_timeout": 600 } @apply_defaults def __init__(self, main_dag_name=None, shipyard_conf=None, *args, **kwargs): """Deployment Configuration Operator Generate a DeploymentConfigurationOperator to read the deployment's configuration for use by other operators :param main_dag_name: Parent Dag :param shipyard_conf: Location of shipyard.conf """ super(DeploymentConfigurationOperator, self).__init__(*args, **kwargs) self.main_dag_name = main_dag_name self.shipyard_conf = shipyard_conf self.action_info = {} def _read_config(self): """Read in and parse the shipyard config""" self.config = configparser.ConfigParser() self.config.read(self.shipyard_conf) def execute(self, context): """Perform Deployment Configuration extraction""" self._read_config() revision_id = self.get_revision_id(context.get('task_instance')) doc = self.get_doc(revision_id) converted = self.map_config_keys(doc) # return the mapped configuration so that it can be placed on xcom return converted def get_revision_id(self, task_instance): """Get the revision id from xcom""" if task_instance: LOG.debug("task_instance found, extracting design version") # Get XcomPuller instance self.xcom_puller = XcomPuller(self.main_dag_name, task_instance) # Set the revision_id to the revision on the xcom self.action_info = self.xcom_puller.get_action_info() revision_id = self.action_info['committed_rev_id'] if revision_id: LOG.info("Revision is set to: %s for deployment-configuration", revision_id) return revision_id # either revision id was not on xcom, or the task_instance is messed raise AirflowException( "Design_revision is not set. Cannot proceed with retrieval of" " the design configuration" ) def get_doc(self, revision_id): """Get the DeploymentConfiguration document dictionary from Deckhand""" schema_fallback = 'shipyard/DeploymentConfiguration/v1' schema = self.config.get(DOCUMENT_INFO, 'deployment_configuration_schema', fallback=schema_fallback) name = self.config.get(DOCUMENT_INFO, 'deployment_configuration_name', fallback='deployment-configuration') LOG.info("Attempting to retrieve {}, {} from Deckhand".format(schema, name)) filters = {"schema": schema, "metadata.name": name} # Create additional headers dict to pass context marker # and end user addl_headers = None if self.action_info: context_marker = self.action_info['context_marker'] end_user = self.action_info['user'] addl_headers = { CustomHeaders.CONTEXT_MARKER.value: context_marker, CustomHeaders.END_USER.value: end_user } try: dhclient = DeckhandClientFactory( self.shipyard_conf).get_client(addl_headers=addl_headers) LOG.info("Deckhand Client acquired") doc = dhclient.revisions.documents(revision_id, rendered=True, **filters) except Exception as ex: try: failed_url = ex.url except AttributeError: failed_url = "No URL generated" LOG.exception(ex) raise AirflowException("Failed to retrieve deployment-" "configuration yaml using url: " "{}".format(failed_url)) if len(doc) == 1 and doc[0].data: doc_dict = doc[0].data else: raise AirflowException("A valid deployment-configuration is " "required") LOG.info("DeploymentConfiguration retrieved") return doc_dict def map_config_keys(self, cfg_data): """Maps the deployment-configuration Converts to a more simple map of key-value pairs """ LOG.info("Mapping keys from deployment-configuration") return { cfg_key: self.get_cfg_value(cfg_data, cfg_key, cfg_default) for cfg_key, cfg_default in DeploymentConfigurationOperator.config_keys_defaults.items() } def get_cfg_value(self, cfg_data, cfg_key, cfg_default): """Uses the dot notation key to get the value from the design config""" data = cfg_data for node in cfg_key.split('.'): data = data.get(node, {}) if data: LOG.info("Deployment Config value set- %s: %s", cfg_key, data) return data else: LOG.info("Deployment Config using default- %s: %s", cfg_key, cfg_default) return cfg_default
class UcpHealthCheckOperator(BaseOperator): """ UCP Health Checks """ @apply_defaults def __init__(self, shipyard_conf=None, main_dag_name=None, xcom_push=True, *args, **kwargs): super(UcpHealthCheckOperator, self).__init__(*args, **kwargs) self.shipyard_conf = shipyard_conf self.main_dag_name = main_dag_name self.xcom_push_flag = xcom_push def execute(self, context): # Initialize variable ucp_components = [ 'armada', 'deckhand', 'kubernetesprovisioner', 'physicalprovisioner', 'shipyard' ] # Define task_instance self.task_instance = context['task_instance'] # Set up and retrieve values from xcom self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance) self.action_info = self.xcom_puller.get_action_info() # Set up xcom_pusher to push values to xcom self.xcom_pusher = XcomPusher(self.task_instance) # Loop through various UCP Components for component in ucp_components: # Retrieve Endpoint Information service_endpoint = ucp_service_endpoint(self, svc_type=component) LOG.info("%s endpoint is %s", component, service_endpoint) # Construct Health Check Endpoint healthcheck_endpoint = os.path.join(service_endpoint, 'health') LOG.info("%s healthcheck endpoint is %s", component, healthcheck_endpoint) try: LOG.info("Performing Health Check on %s", component) # Set health check timeout to 30 seconds req = requests.get(healthcheck_endpoint, timeout=30) # An empty response/body returned by a component means # that it is healthy if req.status_code == 204: LOG.info("%s is alive and healthy", component) except requests.exceptions.RequestException as e: self.log_health_exception(component, e) def log_health_exception(self, component, error_messages): """Logs Exceptions for health check """ # If Drydock health check fails and continue-on-fail, continue # and create xcom key 'drydock_continue_on_fail' if (component == 'physicalprovisioner' and self.action_info['parameters'].get('continue-on-fail').lower() == 'true' and self.action_info['dag_id'] in ['update_site', 'deploy_site']): LOG.warning('Drydock did not pass health check. Continuing ' 'as "continue-on-fail" option is enabled.') self.xcom_pusher.xcom_push(key='drydock_continue_on_fail', value=True) else: LOG.error(error_messages) raise AirflowException( "Health check failed for %s component on " "dag_id=%s. Details: %s" % (component, self.action_info.get('dag_id'), error_messages))
class DeckhandBaseOperator(BaseOperator): """Deckhand Base Operator All deckhand related workflow operators will use the deckhand base operator as the parent and inherit attributes and methods from this class """ @apply_defaults def __init__(self, committed_ver=None, deckhandclient=None, deckhand_client_read_timeout=None, deckhand_svc_endpoint=None, deckhand_svc_type='deckhand', main_dag_name=None, revision_id=None, shipyard_conf=None, sub_dag_name=None, svc_session=None, svc_token=None, validation_read_timeout=None, xcom_push=True, *args, **kwargs): """Initialization of DeckhandBaseOperator object. :param committed_ver: Last committed version :param deckhandclient: An instance of deckhand client :param deckhand_client_read_timeout: Deckhand client connect timeout :param deckhand_svc_endpoint: Deckhand Service Endpoint :param deckhand_svc_type: Deckhand Service Type :param main_dag_name: Parent Dag :param revision_id: Target revision for workflow :param shipyard_conf: Path of shipyard.conf :param sub_dag_name: Child Dag :param svc_session: Keystone Session :param svc_token: Keystone Token :param validation_read_timeout: Deckhand validation timeout :param xcom_push: xcom usage """ super(DeckhandBaseOperator, self).__init__(*args, **kwargs) self.committed_ver = committed_ver self.deckhandclient = deckhandclient self.deckhand_client_read_timeout = deckhand_client_read_timeout self.deckhand_svc_endpoint = deckhand_svc_endpoint self.deckhand_svc_type = deckhand_svc_type self.main_dag_name = main_dag_name self.revision_id = revision_id self.shipyard_conf = shipyard_conf self.sub_dag_name = sub_dag_name self.svc_session = svc_session self.svc_token = svc_token self.validation_read_timeout = validation_read_timeout self.xcom_push_flag = xcom_push def execute(self, context): # Execute deckhand base function self.deckhand_base(context) # Exeute child function self.do_execute() # Push last committed version to xcom for the # 'deckhand_get_design_version' subdag if self.sub_dag_name == 'deckhand_get_design_version': return self.committed_ver @shipyard_service_token def deckhand_base(self, context): # Read and parse shiyard.conf config = configparser.ConfigParser() config.read(self.shipyard_conf) # Initialize variables self.deckhand_client_read_timeout = int(config.get( 'requests_config', 'deckhand_client_read_timeout')) self.validation_read_timeout = int(config.get( 'requests_config', 'validation_read_timeout')) # Define task_instance task_instance = context['task_instance'] # Set up and retrieve values from xcom self.xcom_puller = XcomPuller(self.main_dag_name, task_instance) self.action_info = self.xcom_puller.get_action_info() # Logs uuid of Shipyard action logging.info("Executing Shipyard Action %s", self.action_info['id']) # Retrieve Endpoint Information self.deckhand_svc_endpoint = ucp_service_endpoint( self, svc_type=self.deckhand_svc_type) logging.info("Deckhand endpoint is %s", self.deckhand_svc_endpoint) # Set up DeckHand Client logging.info("Setting up DeckHand Client...") # NOTE: The communication between the Airflow workers # and Deckhand happens via the 'internal' endpoint. self.deckhandclient = deckhand_client.Client( session=self.svc_session, endpoint_type='internal') if not self.deckhandclient: raise AirflowException('Failed to set up deckhand client!') # Retrieve 'revision_id' from xcom for tasks other than # 'deckhand_get_design_version' # # NOTE: In the case of 'deploy_site', the dag_id will # be 'deploy_site.deckhand_get_design_version' for the # 'deckhand_get_design_version' task. We need to extract # the xcom value from it in order to get the value of the # last committed revision ID if self.task_id != 'deckhand_get_design_version': # Retrieve 'revision_id' from xcom self.revision_id = self.xcom_puller.get_design_version() if self.revision_id: logging.info("Revision ID is %d", self.revision_id) else: raise AirflowException('Failed to retrieve Revision ID!')