def run_playbook_process(self, playbook_info, percentage_completed): playbook_process = None self.current_percentage = percentage_completed try: playbook_exec_path = os.path.dirname(__file__) \ + "/playbook_helper.py" unique_pb_id = str(uuid.uuid4()) playbook_info['extra_vars']['playbook_input']['unique_pb_id']\ = unique_pb_id exec_id =\ playbook_info['extra_vars']['playbook_input'][ 'job_execution_id'] pr_uve_name = self.get_pr_uve_name_from_device_name(playbook_info) playbook_process = subprocess32.Popen(["python", playbook_exec_path, "-i", json.dumps(playbook_info)], close_fds=True, cwd='/') # this is to yield the context to the playbooks so that # they start running concurrently gevent.sleep(0) marked_output = self.process_file_and_get_marked_output( unique_pb_id, exec_id, playbook_process, pr_uve_name) marked_jsons = self._extract_marked_json(marked_output) playbook_output = marked_jsons.get(JobFileWrite.PLAYBOOK_OUTPUT) playbook_process.wait(timeout=self._playbook_timeout) # create prouter UVE in job_manager only if it is not a multi # device job template if not self._job_template.get_job_template_multi_device_job(): status = "SUCCESS" if playbook_process.returncode == 0 \ else "FAILURE" self.send_prouter_uve(exec_id, status) except subprocess32.TimeoutExpired as timeout_exp: if playbook_process is not None: os.kill(playbook_process.pid, 9) msg = MsgBundle.getMessage( MsgBundle.RUN_PLAYBOOK_PROCESS_TIMEOUT, playbook_uri=playbook_info['uri'], exc_msg=repr(timeout_exp)) raise JobException(msg, self._execution_id) if playbook_process.returncode != 0: msg = MsgBundle.getMessage(MsgBundle. PLAYBOOK_EXIT_WITH_ERROR, playbook_uri=playbook_info['uri']) if playbook_output: msg = "%s\n Error Message from playbook: %s" % ( msg, playbook_output.get('message', "") ) raise JobException(msg, self._execution_id) return playbook_output
def create_job_summary_message(self): job_summary_message = MsgBundle.getMessage( MsgBundle.JOB_SUMMARY_MESSAGE_HDR) failed_device_jobs_len = len(self.failed_device_jobs) if self.job_result_status is None: job_summary_message += MsgBundle.getMessage( MsgBundle.JOB_RESULT_STATUS_NONE) elif self.job_result_status == JobStatus.FAILURE: if failed_device_jobs_len > 0: job_summary_message += MsgBundle.getMessage( MsgBundle. JOB_MULTI_DEVICE_FAILED_MESSAGE_HDR) for failed_device in self.failed_device_jobs: msg = failed_device + ',' job_summary_message += msg else: job_summary_message += "Job failed. " job_summary_message += "\n" elif self.job_result_status == JobStatus.SUCCESS: job_summary_message += MsgBundle.getMessage( MsgBundle.JOB_EXECUTION_COMPLETE) device_job_result_len = len(self.job_result) if device_job_result_len > 0: job_summary_message += MsgBundle.getMessage( MsgBundle.PLAYBOOK_RESULTS_MESSAGE) job_summary_message += "Successfully completed "\ "job for %s devices.\n"\ % (device_job_result_len - failed_device_jobs_len) # result_summary would infact be the failed_devices # result summary result_summary = "" device_op_results = [] failed_device_names = [] for entry in self.job_result: if entry in self.failed_device_jobs: result_summary += \ "%s:%s \n" % (self.job_result[entry]['device_name'], self.job_result[entry]['message']) failed_device_names.append( self.job_result[entry]['device_name']) elif self.job_result[entry]['device_op_result']: # could be other device jobs such as device import, topology device_op_results.append( self.job_result[entry]['device_op_result']) if result_summary != "": failed_device_msg = "Job execution failed for %s devices.\n"\ % len(self.failed_device_jobs) result_summary = failed_device_msg + result_summary job_summary_message += result_summary if self.job_result_message is not None: job_summary_message += self.job_result_message return job_summary_message, device_op_results, failed_device_names
def handle_init_failure(job_input_json, error_num, error_msg): logger.error(MsgBundle.getMessage(error_num, exc_msg=traceback.format_exc())) msg = MsgBundle.getMessage(error_num, exc_msg=error_msg) job_log_utils.send_job_log(job_input_json.get('job_template_fq_name'), job_input_json.get('job_execution_id'), job_input_json.get('fabric_fq_name'), msg, JobStatus.FAILURE) sys.exit(msg)
def parse_job_input(self, job_input_json): # job input should have job_template_id and execution_id field if job_input_json.get('job_template_id') is None: msg = MsgBundle.getMessage(MsgBundle.JOB_TEMPLATE_MISSING) raise Exception(msg) if job_input_json.get('job_execution_id') is None: msg = MsgBundle.getMessage( MsgBundle.JOB_EXECUTION_ID_MISSING) raise Exception(msg) self.job_template_id = job_input_json.get('job_template_id') self.job_execution_id = job_input_json.get('job_execution_id') self.job_data = job_input_json.get('input') self.fabric_fq_name = job_input_json.get('fabric_fq_name')
def start_job(self): # spawn job greenlets job_handler = JobHandler(self._logger, self._vnc_api, self.job_template, self.job_execution_id, self.job_data, self.job_utils, self.device_json, self.auth_token, self.contrail_cluster_id, self.api_server_host, self.job_log_utils, self.sandesh_args, self.fabric_fq_name, self.job_log_utils.args.playbook_timeout, self.playbook_seq, self.vnc_api_init_params, self._zk_client) # check if its a multi device playbook playbooks = self.job_template.get_job_template_playbooks() play_info = playbooks.playbook_info[self.playbook_seq] is_multi_device_playbook = play_info.multi_device_playbook # for fabric config push as part of delete workflow, # device json is not needed. There will be no performance # impact as fabric delete from DM will always have one prouter # uuid in the device_list. if is_multi_device_playbook: if self.device_json is None or not self.device_json: msg = MsgBundle.getMessage(MsgBundle.DEVICE_JSON_NOT_FOUND) raise JobException(msg, self.job_execution_id) else: self.handle_multi_device_job(job_handler, self.result_handler) else: self.handle_single_job(job_handler, self.result_handler)
def send_job_log(self, job_template_fqname, job_execution_id, fabric_fq_name, message, status, completion_percent=None, result=None, timestamp=None, device_name=None, details=None): try: job_template_fqname = self.get_fq_name_log_str(job_template_fqname) if timestamp is None: timestamp = int(round(time.time() * 1000)) details_str = json.dumps(details) if details else None job_log_entry = JobLogEntry( name=job_template_fqname, execution_id=job_execution_id, fabric_name=fabric_fq_name, timestamp=timestamp, message=message, status=status, percentage_completed=completion_percent, result=result, device_name=device_name, details=details_str) job_log = JobLog(log_entry=job_log_entry) job_log.send(sandesh=self.config_logger._sandesh) self.config_logger.debug("Created job log for job template: %s, " " execution id: %s, fabric_fq_name: %s" "status: %s, completion_percent %s, " "result: " "%s, message: %s" % (job_template_fqname, job_execution_id, fabric_fq_name, status, completion_percent, result, message)) except Exception as e: msg = MsgBundle.getMessage(MsgBundle.SEND_JOB_LOG_ERROR, job_template_fqname=job_template_fqname, job_execution_id=job_execution_id, fabric_name=fabric_fq_name, exc_msg=repr(e)) raise JobException(msg, job_execution_id)
def send_job_execution_uve( self, fabric_fq_name, job_template_fqname, job_execution_id, timestamp=None, percentage_completed=None): try: fabric_job_name = list(job_template_fqname) fabric_job_name.insert(0, fabric_fq_name) fabric_job_uve_name = ':'.join(map(str, fabric_job_name)) job_exe_data = FabricJobExecution( name=fabric_job_uve_name, job_status='IN_PROGRESS', percentage_completed=percentage_completed) job_uve = FabricJobUve( data=job_exe_data, sandesh=self.config_logger._sandesh) job_uve.send(sandesh=self.config_logger._sandesh) except Exception as exp: job_template_fqname = self.get_fq_name_log_str(job_template_fqname) msg = MsgBundle.getMessage(MsgBundle.SEND_JOB_EXC_UVE_ERROR, job_template_fqname=job_template_fqname, job_execution_id=job_execution_id, exc_msg=repr(exp)) raise JobException(msg, job_execution_id)
def _validate_job_input(self, input_schema, ip_json): if ip_json is None: msg = MsgBundle.getMessage( MsgBundle.INPUT_SCHEMA_INPUT_NOT_FOUND) raise JobException(msg, self.job_execution_id) try: ip_schema_json = input_schema if isinstance(input_schema, str): ip_schema_json = json.loads(input_schema) jsonschema.validate(ip_json, ip_schema_json) self._logger.debug("Input Schema Validation Successful" "for template %s" % self.job_template_id) except Exception as exp: msg = MsgBundle.getMessage(MsgBundle.INVALID_SCHEMA, job_template_id=self.job_template_id, exc_obj=exp) raise JobException(msg, self.job_execution_id)
def close_sandesh_connection(self): try: self.wait_for_msg_send() except JobException as job_exp: msg = MsgBundle.getMessage(MsgBundle.CLOSE_SANDESH_EXCEPTION) self._logger.error(msg) job_exp.msg = msg raise job_exp finally: self.uninit_sandesh()
def run_playbook(self, playbook_info, percentage_completed): playbook_output = None try: # create job log to capture the start of the playbook device_name = \ playbook_info['extra_vars']['playbook_input'].get( 'device_fqname') if device_name: device_name = device_name[-1] playbook_name = playbook_info['uri'].split('/')[-1] msg = MsgBundle.getMessage(MsgBundle.START_EXE_PB_MSG, playbook_name=playbook_name) self._logger.debug(msg) if not os.path.exists(playbook_info['uri']): msg = MsgBundle.getMessage(MsgBundle.PLAYBOOK_NOT_FOUND, playbook_uri=playbook_info['uri']) raise JobException(msg, self._execution_id) # Run playbook in a separate process. This is needed since # ansible cannot be used in a greenlet based patched environment playbook_output = self.run_playbook_process(playbook_info, percentage_completed) # create job log to capture completion of the playbook execution msg = MsgBundle.getMessage(MsgBundle.STOP_EXE_PB_MSG, playbook_name=playbook_name) self._logger.debug(msg) return playbook_output except JobException: raise except Exception as exp: trace = traceback.format_exc() msg = MsgBundle.getMessage(MsgBundle.RUN_PLAYBOOK_ERROR, playbook_uri=playbook_info['uri'], exc_msg=repr(exp)) raise JobException("%s\n%s" %(msg, trace), self._execution_id)
def send_prouter_object_log(self, prouter_fqname, job_execution_id, job_input, job_template_fqname, onboarding_state, os_version=None, serial_num=None, timestamp=None): try: job_template_fqname = self.get_fq_name_log_str(job_template_fqname) prouter_fqname = self.get_fq_name_log_str(prouter_fqname) if timestamp is None: timestamp = int(round(time.time() * 1000)) # create the prouter object log prouter_log_entry = PRouterOnboardingLogEntry( name=prouter_fqname, job_execution_id=job_execution_id, os_version=os_version, serial_num=serial_num, onboarding_state=onboarding_state, timestamp=timestamp, job_template_fqname=job_template_fqname, job_input=job_input) prouter_log = PRouterOnboardingLog(log_entry=prouter_log_entry) prouter_log.send(sandesh=self.config_logger._sandesh) self.config_logger.debug( "Created prouter object log for router: %s, " " execution id: %s, job_template: %s, os_version: " "%s, serial_num: %s, onboarding_state %s" % (prouter_fqname, job_execution_id, job_template_fqname, os_version, serial_num, onboarding_state)) except Exception as exp: msg = MsgBundle.getMessage(MsgBundle.SEND_PROUTER_OBJECT_LOG_ERROR, prouter_fqname=prouter_fqname, job_execution_id=job_execution_id, exc_msg=repr(exp)) raise JobException(msg, job_execution_id)
def start_job(self): # spawn job greenlets job_handler = JobHandler(self._logger, self._vnc_api, self.job_template, self.job_execution_id, self.job_data, self.job_utils, self.device_json, self.auth_token, self.api_server_host, self.job_log_utils, self.sandesh_args, self.fabric_fq_name, self.job_log_utils.args.playbook_timeout, self.playbook_seq, self.vnc_api_init_params, self._zk_client, self.db_init_params, self.cluster_id) if self.device_json is not None: if not self.device_json: msg = MsgBundle.getMessage(MsgBundle.DEVICE_JSON_NOT_FOUND) raise JobException(msg, self.job_execution_id) else: self.handle_multi_device_job(job_handler, self.result_handler) else: self.handle_single_job(job_handler, self.result_handler)
def send_prouter_job_uve( self, job_template_fqname, fq_names, job_execution_id, prouter_state=None, job_status=None, percentage_completed=None, device_op_results = "{}"): try: job_template_fqname = self.get_fq_name_log_str(job_template_fqname) if prouter_state is None: prouter_job_data = PhysicalRouterJobExecution( name=fq_names, execution_id=job_execution_id, job_status=job_status, percentage_completed=percentage_completed, device_op_results = device_op_results ) else: prouter_job_data = PhysicalRouterJobExecution( name=fq_names, execution_id=job_execution_id, prouter_state=prouter_state, job_status=job_status, percentage_completed=percentage_completed, device_op_results = device_op_results ) prouter_job_uve = PhysicalRouterJobUve( data=prouter_job_data, sandesh=self.config_logger._sandesh) prouter_job_uve.send(sandesh=self.config_logger._sandesh) except Exception as exp: msg = MsgBundle.getMessage(MsgBundle.SEND_JOB_EXC_UVE_ERROR, job_template_fqname=job_template_fqname, job_execution_id=job_execution_id, exc_msg=repr(exp)) raise JobException(msg, job_execution_id)
def initialize_sandesh_logger(self, config_args, sandesh=True, sandesh_instance=None): # parse the logger args args = self.parse_logger_args(config_args) args.random_collectors = args.collectors if args.collectors: args.random_collectors = random.sample(args.collectors, len(args.collectors)) self.args = args # initialize logger logger = JobLogger(args=args, sandesh_instance_id=self.sandesh_instance_id, sandesh_instance=sandesh_instance) if not sandesh_instance and sandesh: try: sandesh_util = SandeshUtils(logger) sandesh_util.wait_for_connection_establish() except JobException: msg = MsgBundle.getMessage( MsgBundle.SANDESH_INITIALIZATION_TIMEOUT_ERROR) raise JobException(msg) logger.info("Sandesh is initialized. Config logger instance created.") return logger
sys.exit(msg) def parse_args(): parser = argparse.ArgumentParser(description='Ansible playbook input ' 'parameters') parser.add_argument('-i', '--playbook_input', nargs=1, help='Playbook input json') return parser.parse_args() if __name__ == "__main__": playbook_input_json = None try: playbook_params = parse_args() playbook_input_json = json.loads(playbook_params.playbook_input[0]) if playbook_input_json is None: sys.exit(MsgBundle.getMessage(MsgBundle.NO_PLAYBOOK_INPUT_DATA)) except Exception as exp: ERR_MSG = "Failed to start playbook due "\ "to Exception: %s" % traceback.print_stack() JM_LOGGER.error(ERR_MSG) sys.exit( MsgBundle.getMessage(MsgBundle.PLAYBOOK_INPUT_PARSING_ERROR, exc_msg=repr(exp))) playbook_helper = PlaybookHelper() playbook_helper.execute_playbook(playbook_input_json)
def start_job(self): job_error_msg = None job_template = None try: # create job UVE and log job_template = self.job_utils.read_job_template() self.job_template = job_template self.job_description = self.job_template.display_name if not self.job_transaction_descr: self.job_transaction_descr = self._generate_transaction_descr() self.result_handler = JobResultHandler( self.job_template_id, self.job_execution_id, self.fabric_fq_name, self._logger, self.job_utils, self.job_log_utils, self.device_name, self.job_description, self.job_transaction_id, self.job_transaction_descr) msg = MsgBundle.getMessage( MsgBundle.START_JOB_MESSAGE, job_execution_id=self.job_execution_id, job_template_name=job_template.fq_name[-1]) self._logger.debug(msg) timestamp = int(round(time.time() * 1000)) self.job_log_utils.send_job_log( job_template.fq_name, self.job_execution_id, self.fabric_fq_name, msg, JobStatus.STARTING.value, timestamp=timestamp, device_name=self.device_name, description=self.job_description, transaction_id=self.job_transaction_id, transaction_descr=self.job_transaction_descr) # validate job input if required by job_template input_schema input_schema = job_template.get_job_template_input_schema() if input_schema: self._validate_job_input(input_schema, self.job_data) playbook_list = job_template.get_job_template_playbooks()\ .get_playbook_info() job_percent = None # calculate job percentage for each playbook if len(playbook_list) > 1: task_weightage_array = [ pb_info.job_completion_weightage for pb_info in playbook_list ] cleanup_in_progress = False cleanup_completed = False pb_idx = 0 while pb_idx < len(playbook_list): # check if its a multi device playbook playbooks = job_template.get_job_template_playbooks() play_info = playbooks.playbook_info[pb_idx] multi_device_playbook = play_info.multi_device_playbook playbook_name = play_info.playbook_uri.split('/')[-1] if cleanup_in_progress: # If we need to cleanup due to a previous error, ignore # any playbooks that don't perform recovery if not play_info.recovery_playbook: self._logger.info("Ignoring playbook %s since it " "does not perform recovery" % playbook_name) pb_idx += 1 continue # If we are running a recovery playbook, then # cleanup_completed needs to be set irrespective of # a success or error in recovery playbook execution else: self._logger.info("Running recovery playbook %s" % playbook_name) cleanup_completed = True else: # Don't run a recovery playbook if we haven't hit an error if play_info.recovery_playbook: self._logger.info( "Ignoring recovery playbook %s since we " "haven't hit an error" % playbook_name) pb_idx += 1 continue if len(playbook_list) > 1: # get the job percentage based on weightage of each plabook # when they are chained job_percent = \ self.job_log_utils.calculate_job_percentage( len(playbook_list), buffer_task_percent=True, total_percent=100, task_seq_number=pb_idx + 1, task_weightage_array=task_weightage_array)[0] else: job_percent = \ self.job_log_utils.calculate_job_percentage( len(playbook_list), buffer_task_percent=True, total_percent=100)[0] # using equal weightage retry_devices = None while True: job_mgr = JobManager(self._logger, self._vnc_api, self.job_input, self.job_log_utils, job_template, self.result_handler, self.job_utils, pb_idx, job_percent, self._zk_client, self.job_description, self.job_transaction_id, self.job_transaction_descr) self.job_mgr = job_mgr job_mgr.start_job() # retry the playbook execution if retry_devices is added to # the playbook output job_status = self.result_handler.job_result_status retry_devices = self.result_handler.get_retry_devices() failed_device_list = self.result_handler\ .get_failed_device_list() if job_status == JobStatus.FAILURE or not retry_devices \ or self.abort_flag: break self.job_input['device_json'] = retry_devices self.job_input['input']['failed_list'] = failed_device_list # update the job input with marked playbook output json pb_output = self.result_handler.playbook_output or {} if pb_output.get('early_exit'): break # stop the workflow if playbook failed if self.result_handler.job_result_status == JobStatus.FAILURE: # If it is a single device job or # if it is a multi device playbook # and all the devices have failed some job execution, # declare it as failure, perform cleanup if possible # and then stop the workflow if not multi_device_playbook or \ (multi_device_playbook and len(self.result_handler.failed_device_jobs) == len(self.job_input.get('device_json'))): if not cleanup_in_progress: cleanup_in_progress = True pb_idx = 0 self._logger.info("Stop the workflow on the failed" " Playbook and start cleanup") else: pb_idx += 1 continue elif not retry_devices: # it is a multi device playbook but one of # the device jobs have failed. This means we should # still declare the operation as success. We declare # workflow as success even if one of the devices has # succeeded the job self.result_handler.job_result_status =\ JobStatus.SUCCESS if self.abort_flag: err_msg = "ABORTING NOW..." self._logger.info(err_msg) self.result_handler.update_job_status( JobStatus.FAILURE, err_msg) break # update the job input with marked playbook output json pb_output = self.result_handler.playbook_output or {} # read the device_data output of the playbook # and update the job input so that it can be used in next # iteration if not multi_device_playbook: device_json = pb_output.pop('device_json', None) self.job_input['device_json'] = device_json self.job_input.get('input', {}).update(pb_output) pb_idx += 1 # A successful recovery playbook execution might # set JobStatus to success but this does not indicate a # success in the workflow. Set JobStatus to failure again. if cleanup_completed: err_msg = "Finished cleaning up after the error" self.result_handler.update_job_status(JobStatus.FAILURE, err_msg) cleanup_completed = False cleanup_in_progress = False # create job completion log and update job UVE self.result_handler.create_job_summary_log(job_template.fq_name) # in case of failures, exit the job manager process with failure if self.result_handler.job_result_status == JobStatus.FAILURE: job_error_msg = self.result_handler.job_summary_message except JobException as exp: err_msg = "Job Exception recieved: %s " % repr(exp) self._logger.error(err_msg) self._logger.error("%s" % traceback.format_exc()) self.result_handler.update_job_status(JobStatus.FAILURE, err_msg) if job_template: self.result_handler.create_job_summary_log( job_template.fq_name) job_error_msg = err_msg except Exception as exp: err_msg = "Error while executing job %s " % repr(exp) self._logger.error(err_msg) self._logger.error("%s" % traceback.format_exc()) self.result_handler.update_job_status(JobStatus.FAILURE, err_msg) self.result_handler.create_job_summary_log(job_template.fq_name) job_error_msg = err_msg finally: # need to wait for the last job log and uve update to complete # via sandesh and then close sandesh connection sandesh_util = SandeshUtils(self._logger) sandesh_util.close_sandesh_connection() self._logger.info("Closed Sandesh connection") if job_error_msg is not None: sys.exit(job_error_msg)
def execute_playbook(self, playbook_info): output = None try: loader = DataLoader() inventory = InventoryManager(loader=loader, sources=['localhost']) variable_manager = VariableManager(loader=loader, inventory=inventory) Options = namedtuple('Options', ['listtags', 'listtasks', 'listhosts', 'syntax', 'connection', 'module_path', 'forks', 'remote_user', 'private_key_file', 'ssh_common_args', 'ssh_extra_args', 'sftp_extra_args', 'scp_extra_args', 'become', 'become_method', 'become_user', 'verbosity', 'check', 'diff']) options = Options(listtags=False, listtasks=False, listhosts=False, syntax=False, connection='ssh', module_path=None, forks=100, remote_user=None, private_key_file=None, ssh_common_args=None, ssh_extra_args=None, sftp_extra_args=None, scp_extra_args=None, become=None, become_method=None, become_user=None, verbosity=None, check=False, diff=False) variable_manager.extra_vars = playbook_info['extra_vars'] pbex = PlaybookExecutor(playbooks=[playbook_info['uri']], inventory=inventory, variable_manager=variable_manager, loader=loader, options=options, passwords=None) ret_val = pbex.run() output = self.get_plugin_output(pbex) if ret_val != 0: msg = MsgBundle.getMessage(MsgBundle. PLAYBOOK_RETURN_WITH_ERROR) raise Exception(msg) if output is None or output.get('status') is None: msg = MsgBundle.getMessage(MsgBundle. PLAYBOOK_OUTPUT_MISSING) raise Exception(msg) if output.get('status').lower() == "failure": msg = MsgBundle.getMessage(MsgBundle. PLAYBOOK_STATUS_FAILED) raise Exception(msg) return output except Exception as exp: msg = MsgBundle.getMessage(MsgBundle.PLAYBOOK_EXECUTE_ERROR, playbook_uri=playbook_info['uri'], execution_id=playbook_info['extra_vars'] ['playbook_input']['job_execution_id'], exc_msg=repr(exp)) if exp.message: msg = msg + "\n" + exp.message JM_LOGGER.error(msg) # after handling exception, write an END # to stop listening to the file if created unique_pb_id = playbook_info['extra_vars'][ 'playbook_input']['unique_pb_id'] exec_id = playbook_info['extra_vars']['playbook_input'][ 'job_execution_id'] self._job_file_write.write_to_file( exec_id, unique_pb_id, JobFileWrite.PLAYBOOK_OUTPUT, json.dumps(output) ) with open("/tmp/"+exec_id, "a") as f: f.write(unique_pb_id + 'END' + PLAYBOOK_EOL_PATTERN) sys.exit(msg)
def parse_args(): parser = argparse.ArgumentParser(description='Ansible playbook input ' 'parameters') parser.add_argument('-i', '--playbook_input', nargs=1, help='Playbook input json') return parser.parse_args() if __name__ == "__main__": playbook_input_json = None try: playbook_params = parse_args() playbook_input_json = json.loads(playbook_params.playbook_input[0]) if playbook_input_json is None: sys.exit(MsgBundle.getMessage(MsgBundle.NO_PLAYBOOK_INPUT_DATA)) except Exception as exp: ERR_MSG = "Failed to start playbook due "\ "to Exception: %s" % traceback.print_stack() JM_LOGGER.error(ERR_MSG) sys.exit(MsgBundle.getMessage(MsgBundle.PLAYBOOK_INPUT_PARSING_ERROR, exc_msg=repr(exp))) playbook_helper = PlaybookHelper() pb_output = playbook_helper.execute_playbook(playbook_input_json) # if it comes here, it implies the pb_output is of correct # format and is present with status Sucess. So derive the # playbook output to be written to file and finally write END to the file unique_pb_id = playbook_input_json['extra_vars'][ 'playbook_input']['unique_pb_id']
def start_job(self): self._logger.info("Starting Executable") job_error_msg = None job_template = self.job_template try: # create job UVE and log self.result_handler = JobResultHandler( self.job_template_id, self.job_execution_id, self.fabric_fq_name, self._logger, self.job_utils, self.job_log_utils, self.device_name, self.job_description, self.job_transaction_id, self.job_transaction_descr) msg = MsgBundle.getMessage( MsgBundle.START_JOB_MESSAGE, job_execution_id=self.job_execution_id, job_template_name=job_template.fq_name[-1]) self._logger.debug(msg) timestamp = int(round(time.time() * 1000)) self.job_log_utils.send_job_log( job_template.fq_name, self.job_execution_id, self.fabric_fq_name, msg, JobStatus.STARTING.value, timestamp=timestamp, description=self.job_description, transaction_id=self.job_transaction_id, transaction_descr=self.job_transaction_descr) # validate job input if required by job_template input_schema input_schema = job_template.get_job_template_input_schema() if input_schema: self._validate_job_input(input_schema, self.job_data) executable_list = job_template.get_job_template_executables()\ .get_executable_info() for executable in executable_list: exec_path = executable.get_executable_path() executable.get_executable_args() job_input_args = self.gather_job_args() try: exec_process = subprocess32.Popen( [exec_path, "--job-input", json.dumps(job_input_args)], close_fds=True, cwd='/', stdout=subprocess32.PIPE, stderr=subprocess32.PIPE) self.job_file_write.write_to_file( self.job_execution_id, "job_summary", JobFileWrite.JOB_LOG, {"job_status": JobStatus.IN_PROGRESS.value}) msg = "Child process pid = " + str(exec_process.pid) self._logger.info(msg) (out, err) = exec_process.communicate( timeout=self.executable_timeout) self._logger.notice(str(out)) self._logger.notice(str(err)) timestamp = int(round(time.time() * 1000)) self.job_log_utils.send_job_log( job_template.fq_name, self.job_execution_id, self.fabric_fq_name, str(err), JobStatus.IN_PROGRESS.value, timestamp=timestamp, description=self.job_description, transaction_id=self.job_transaction_id, transaction_descr=self.job_transaction_descr) except subprocess32.TimeoutExpired as timeout_exp: if exec_process is not None: os.kill(exec_process.pid, 9) msg = MsgBundle.getMessage( MsgBundle.RUN_EXECUTABLE_PROCESS_TIMEOUT, exec_path=exec_path, exc_msg=repr(timeout_exp)) raise JobException(msg, self.job_execution_id) self._logger.info(exec_process.returncode) self._logger.info("Executable Completed") if exec_process.returncode != 0: job_status = JobStatus.FAILURE.value msg = MsgBundle.getMessage( MsgBundle.EXECUTABLE_RETURN_WITH_ERROR, exec_uri=exec_path) self._logger.error(msg) else: job_status = JobStatus.SUCCESS.value msg = MsgBundle.getMessage( MsgBundle.JOB_EXECUTION_COMPLETE, job_execution_id=self.job_execution_id, job_template_name=\ job_template.fq_name[-1]) self.job_file_write.write_to_file(self.job_execution_id, "job_summary", JobFileWrite.JOB_LOG, {"job_status": job_status}) self._logger.debug(msg) timestamp = int(round(time.time() * 1000)) self.job_log_utils.send_job_log( job_template.fq_name, self.job_execution_id, self.fabric_fq_name, msg, job_status, timestamp=timestamp, description=self.job_description, transaction_id=self.job_transaction_id, transaction_descr=self.job_transaction_descr) except JobException as exp: err_msg = "Job Exception recieved: %s " % repr(exp) self._logger.error(err_msg) self._logger.error("%s" % traceback.format_exc()) self.result_handler.update_job_status(JobStatus.FAILURE, err_msg) if job_template: self.result_handler.create_job_summary_log( job_template.fq_name) job_error_msg = err_msg except Exception as exp: err_msg = "Error while executing job %s " % repr(exp) self._logger.error(err_msg) self._logger.error("%s" % traceback.format_exc()) self.result_handler.update_job_status(JobStatus.FAILURE, err_msg) self.result_handler.create_job_summary_log(job_template.fq_name) job_error_msg = err_msg finally: # need to wait for the last job log and uve update to complete # via sandesh and then close sandesh connection sandesh_util = SandeshUtils(self._logger) sandesh_util.close_sandesh_connection() self._logger.info("Closed Sandesh connection") if job_error_msg is not None: sys.exit(job_error_msg)
def start_job(self): job_error_msg = None job_template = None try: # create job UVE and log self.result_handler = JobResultHandler(self.job_template_id, self.job_execution_id, self.fabric_fq_name, self._logger, self.job_utils, self.job_log_utils) job_template = self.job_utils.read_job_template() msg = MsgBundle.getMessage(MsgBundle.START_JOB_MESSAGE, job_execution_id=self.job_execution_id, job_template_name=\ job_template.fq_name[-1]) self._logger.debug(msg) timestamp = int(round(time.time() * 1000)) self.job_log_utils.send_job_log(job_template.fq_name, self.job_execution_id, self.fabric_fq_name, msg, JobStatus.STARTING.value, timestamp=timestamp) # validate job input if required by job_template input_schema input_schema = job_template.get_job_template_input_schema() if input_schema: self._validate_job_input(input_schema, self.job_data) playbook_list = job_template.get_job_template_playbooks()\ .get_playbook_info() job_percent = None # calculate job percentage for each playbook if len(playbook_list) > 1: task_weightage_array = [ pb_info.job_completion_weightage for pb_info in playbook_list] for i in range(0, len(playbook_list)): if len(playbook_list) > 1: # get the job percentage based on weightage of each plabook # when they are chained job_percent = \ self.job_log_utils.calculate_job_percentage( len(playbook_list), buffer_task_percent=True, total_percent=100, task_seq_number=i + 1, task_weightage_array=task_weightage_array)[0] else: job_percent = \ self.job_log_utils.calculate_job_percentage( len(playbook_list), buffer_task_percent=True, total_percent=100)[0] # using equal weightage retry_devices = None while True: job_mgr = JobManager(self._logger, self._vnc_api, self.job_input, self.job_log_utils, job_template, self.result_handler, self.job_utils, i, job_percent, self._zk_client, self.db_init_params, self.cluster_id) job_mgr.start_job() # retry the playbook execution if retry_devices is added to # the playbook output job_status = self.result_handler.job_result_status retry_devices = self.result_handler.get_retry_devices() if job_status == JobStatus.FAILURE or not retry_devices: break self.job_input['device_json'] = retry_devices # stop the workflow if playbook failed if self.result_handler.job_result_status == JobStatus.FAILURE: # stop workflow only if its a single device job or # it is a multi device playbook # and all the devices have failed some job execution # declare it as failure and the stop the workflow if self.job_input.get('device_json') is None or\ len(self.result_handler.failed_device_jobs)\ == len(self.job_input.get('device_json')): self._logger.error( "Stop the workflow on the failed Playbook.") break elif not retry_devices: # it is a multi device playbook but one of the device jobs # have failed. This means we should still declare # the operation as success. We declare workflow as # success even if one of the devices has succeeded the job self.result_handler.job_result_status = JobStatus.SUCCESS # update the job input with marked playbook output json pb_output = self.result_handler.playbook_output or {} # read the device_data output of the playbook # and update the job input so that it can be used in next # iteration if not self.job_input.get('device_json'): device_json = pb_output.pop('device_json', None) self.job_input['device_json'] = device_json self.job_input.get('input', {}).update(pb_output) # create job completion log and update job UVE self.result_handler.create_job_summary_log( job_template.fq_name) # in case of failures, exit the job manager process with failure if self.result_handler.job_result_status == JobStatus.FAILURE: job_error_msg = self.result_handler.job_summary_message except JobException as exp: err_msg = "Job Exception recieved: %s " % repr(exp) self._logger.error(err_msg) self._logger.error("%s" % traceback.format_exc()) self.result_handler.update_job_status(JobStatus.FAILURE, err_msg) if job_template: self.result_handler.create_job_summary_log( job_template.fq_name) job_error_msg = err_msg except Exception as exp: err_msg = "Error while executing job %s " % repr(exp) self._logger.error(err_msg) self._logger.error("%s" % traceback.format_exc()) self.result_handler.update_job_status(JobStatus.FAILURE, err_msg) self.result_handler.create_job_summary_log(job_template.fq_name) job_error_msg = err_msg finally: # need to wait for the last job log and uve update to complete # via sandesh and then close sandesh connection sandesh_util = SandeshUtils(self._logger) sandesh_util.close_sandesh_connection() self._logger.info("Closed Sandesh connection") if job_error_msg is not None: sys.exit(job_error_msg)
logger = job_log_utils.config_logger except Exception as exp: print >> sys.stderr, "Failed to initialize logger due "\ "to Exception: %s" % traceback.format_exc() sys.exit("Exiting due to logger initialization error: %s" % repr(exp)) # initialize _vnc_api instance vnc_api = None try: auth_token = job_input_json['auth_token'] vnc_api = VncApi(auth_token=auth_token) logger.info("VNC api is initialized using the auth token passed.") except Exception as exp: logger.error( MsgBundle.getMessage(MsgBundle.VNC_INITIALIZATION_ERROR, exc_msg=traceback.format_exc())) msg = MsgBundle.getMessage(MsgBundle.VNC_INITIALIZATION_ERROR, exc_msg=repr(exp)) job_log_utils.send_job_log(job_input_json['job_template_fq_name'], job_input_json['job_execution_id'], job_input_json.get('fabric_fq_name'), msg, JobStatus.FAILURE) sys.exit(msg) # invoke job manager try: workflow_manager = WFManager(logger, vnc_api, job_input_json, job_log_utils) logger.info("Job Manager is initialized. Starting job.") workflow_manager.start_job() except Exception as exp:
def run_playbook_process(self, playbook_info, percentage_completed): playbook_process = None playbook_output = None pr_uve_name = None self.current_percentage = percentage_completed try: playbook_exec_path = os.path.dirname(__file__) \ + "/playbook_helper.py" unique_pb_id = str(uuid.uuid4()) playbook_info['extra_vars']['playbook_input']['unique_pb_id']\ = unique_pb_id exec_id =\ playbook_info['extra_vars']['playbook_input'][ 'job_execution_id'] device_fqname = \ playbook_info['extra_vars']['playbook_input'].get( 'device_fqname') if device_fqname: pr_fqname = ':'.join(map(str, device_fqname)) job_template_fq_name = ':'.join( map(str, self._job_template.fq_name)) pr_uve_name = pr_fqname + ":" + \ self._fabric_fq_name + ":" + job_template_fq_name pr_object_log_start_time = time.time() playbook_process = subprocess32.Popen([ "python", playbook_exec_path, "-i", json.dumps(playbook_info) ], close_fds=True, cwd='/') # this is to yield the context to the playbooks so that # they start running concurrently gevent.sleep(0) marked_output = self.process_file_and_get_marked_output( unique_pb_id, exec_id, playbook_process, pr_uve_name) marked_jsons = self._extract_marked_json(marked_output) playbook_output = marked_jsons.get(PLAYBOOK_OUTPUT) playbook_process.wait(timeout=self._playbook_timeout) pr_object_log_end_time = time.time() # create prouter UVE in job_manager only if it is not a multi # device job template if not self._job_template.get_job_template_multi_device_job(): self.send_prouter_uve(exec_id, pr_object_log_start_time, pr_object_log_end_time, playbook_process.returncode) except subprocess32.TimeoutExpired as timeout_exp: if playbook_process is not None: os.kill(playbook_process.pid, 9) msg = MsgBundle.getMessage(MsgBundle.RUN_PLAYBOOK_PROCESS_TIMEOUT, playbook_uri=playbook_info['uri'], exc_msg=repr(timeout_exp)) raise JobException(msg, self._execution_id) except Exception as exp: msg = MsgBundle.getMessage(MsgBundle.RUN_PLAYBOOK_PROCESS_ERROR, playbook_uri=playbook_info['uri'], exc_msg=repr(exp)) raise JobException(msg, self._execution_id) if playbook_process.returncode != 0: msg = MsgBundle.getMessage(MsgBundle.PLAYBOOK_EXIT_WITH_ERROR, playbook_uri=playbook_info['uri']) if playbook_output: msg = msg + "\n Error Message from playbook: %s" % playbook_output.get( 'message', "") raise JobException(msg, self._execution_id) return playbook_output
def execute_playbook(self, playbook_info): try: loader = DataLoader() inventory = InventoryManager(loader=loader, sources=['localhost']) variable_manager = VariableManager(loader=loader, inventory=inventory) Options = namedtuple('Options', [ 'listtags', 'listtasks', 'listhosts', 'syntax', 'connection', 'module_path', 'forks', 'remote_user', 'private_key_file', 'ssh_common_args', 'ssh_extra_args', 'sftp_extra_args', 'scp_extra_args', 'become', 'become_method', 'become_user', 'verbosity', 'check', 'diff' ]) options = Options(listtags=False, listtasks=False, listhosts=False, syntax=False, connection='ssh', module_path=None, forks=100, remote_user='******', private_key_file=None, ssh_common_args=None, ssh_extra_args=None, sftp_extra_args=None, scp_extra_args=None, become=None, become_method=None, become_user=None, verbosity=None, check=False, diff=False) variable_manager.extra_vars = playbook_info['extra_vars'] pbex = PlaybookExecutor(playbooks=[playbook_info['uri']], inventory=inventory, variable_manager=variable_manager, loader=loader, options=options, passwords=None) ret_val = pbex.run() if ret_val != 0: msg = MsgBundle.getMessage( MsgBundle.PLAYBOOK_RETURN_WITH_ERROR) raise Exception(msg) output = self.get_plugin_output(pbex) if output is None or output.get('status') is None: msg = MsgBundle.getMessage(MsgBundle.PLAYBOOK_OUTPUT_MISSING) raise Exception(msg) if output.get('status').lower() == "failure": msg = MsgBundle.getMessage(MsgBundle.PLAYBOOK_STATUS_FAILED) raise Exception(msg) return output except Exception as e: msg = MsgBundle.getMessage(MsgBundle.PLAYBOOK_EXECUTE_ERROR, playbook_uri=playbook_info['uri'], execution_id=playbook_info['extra_vars'] ['playbook_input']['job_execution_id'], exc_msg=repr(e)) if e.message: msg = msg + "\n" + e.message JM_LOGGER.error(msg) sys.exit(msg)
def execute_playbook(self, playbook_info): output = None try: loader = DataLoader() inventory = InventoryManager(loader=loader, sources=['localhost']) variable_manager = VariableManager(loader=loader, inventory=inventory) Options = namedtuple('Options', [ 'listtags', 'listtasks', 'listhosts', 'syntax', 'connection', 'module_path', 'forks', 'remote_user', 'private_key_file', 'ssh_common_args', 'ssh_extra_args', 'sftp_extra_args', 'scp_extra_args', 'become', 'become_method', 'become_user', 'verbosity', 'check', 'diff' ]) options = Options(listtags=False, listtasks=False, listhosts=False, syntax=False, connection='ssh', module_path=None, forks=100, remote_user=None, private_key_file=None, ssh_common_args=None, ssh_extra_args=None, sftp_extra_args=None, scp_extra_args=None, become=None, become_method=None, become_user=None, verbosity=None, check=False, diff=False) variable_manager.extra_vars = playbook_info['extra_vars'] pbex = PlaybookExecutor(playbooks=[playbook_info['uri']], inventory=inventory, variable_manager=variable_manager, loader=loader, options=options, passwords=None) ret_val = pbex.run() if ret_val != 0: msg = MsgBundle.getMessage( MsgBundle.PLAYBOOK_RETURN_WITH_ERROR) raise Exception(msg) output = self.get_plugin_output(pbex) if output is None or output.get('status') is None: msg = MsgBundle.getMessage(MsgBundle.PLAYBOOK_OUTPUT_MISSING) raise Exception(msg) if output.get('status').lower() == "failure": msg = MsgBundle.getMessage(MsgBundle.PLAYBOOK_STATUS_FAILED) raise Exception(msg) return output except Exception as exp: msg = MsgBundle.getMessage(MsgBundle.PLAYBOOK_EXECUTE_ERROR, playbook_uri=playbook_info['uri'], execution_id=playbook_info['extra_vars'] ['playbook_input']['job_execution_id'], exc_msg=repr(exp)) if exp.message: msg = msg + "\n" + exp.message JM_LOGGER.error(msg) # after handling exception, write an END # to stop listening to the file if created unique_pb_id = playbook_info['extra_vars']['playbook_input'][ 'unique_pb_id'] exec_id = playbook_info['extra_vars']['playbook_input'][ 'job_execution_id'] line_in_file = "" if output != None: line_in_file = unique_pb_id + 'PLAYBOOK_OUTPUT##'\ + json.dumps(output) + 'PLAYBOOK_OUTPUT##'\ + '\n' with open("/tmp/" + exec_id, "a") as f: f.write(line_in_file + unique_pb_id + 'END' + '\n') sys.exit(msg)
def start_job(self): job_error_msg = None job_template = None try: # create job UVE and log self.result_handler = JobResultHandler(self.job_template_id, self.job_execution_id, self.fabric_fq_name, self._logger, self.job_utils, self.job_log_utils) job_template = self.job_utils.read_job_template() msg = MsgBundle.getMessage(MsgBundle.START_JOB_MESSAGE, job_execution_id=self.job_execution_id, job_template_name=\ job_template.fq_name[-1]) self._logger.debug(msg) timestamp = int(round(time.time() * 1000)) self.job_log_utils.send_job_log(job_template.fq_name, self.job_execution_id, self.fabric_fq_name, msg, JobStatus.STARTING.value, timestamp=timestamp) # validate job input if required by job_template input_schema input_schema = job_template.get_job_template_input_schema() if input_schema: self._validate_job_input(input_schema, self.job_data) playbook_list = job_template.get_job_template_playbooks()\ .get_playbook_info() job_percent = None # calculate job percentage for each playbook if len(playbook_list) > 1: task_weightage_array = [ pb_info.job_completion_weightage for pb_info in playbook_list] for i in range(0, len(playbook_list)): # check if its a multi device playbook playbooks = job_template.get_job_template_playbooks() play_info = playbooks.playbook_info[i] multi_device_playbook = play_info.multi_device_playbook if len(playbook_list) > 1: # get the job percentage based on weightage of each plabook # when they are chained job_percent = \ self.job_log_utils.calculate_job_percentage( len(playbook_list), buffer_task_percent=True, total_percent=100, task_seq_number=i + 1, task_weightage_array=task_weightage_array)[0] else: job_percent = \ self.job_log_utils.calculate_job_percentage( len(playbook_list), buffer_task_percent=True, total_percent=100)[0] # using equal weightage retry_devices = None while True: job_mgr = JobManager(self._logger, self._vnc_api, self.job_input, self.job_log_utils, job_template, self.result_handler, self.job_utils, i, job_percent, self._zk_client) job_mgr.start_job() # retry the playbook execution if retry_devices is added to # the playbook output job_status = self.result_handler.job_result_status retry_devices = self.result_handler.get_retry_devices() if job_status == JobStatus.FAILURE or not retry_devices: break self.job_input['device_json'] = retry_devices # stop the workflow if playbook failed if self.result_handler.job_result_status == JobStatus.FAILURE: # stop workflow only if its a single device job or # it is a multi device playbook # and all the devices have failed some job execution # declare it as failure and the stop the workflow if not multi_device_playbook or \ (multi_device_playbook and len(self.result_handler.failed_device_jobs) == \ len(self.job_input.get('device_json'))): self._logger.error( "Stop the workflow on the failed Playbook.") break elif not retry_devices: # it is a multi device playbook but one of the device jobs # have failed. This means we should still declare # the operation as success. We declare workflow as # success even if one of the devices has succeeded the job self.result_handler.job_result_status = JobStatus.SUCCESS # update the job input with marked playbook output json pb_output = self.result_handler.playbook_output or {} # read the device_data output of the playbook # and update the job input so that it can be used in next # iteration if not multi_device_playbook: device_json = pb_output.pop('device_json', None) self.job_input['device_json'] = device_json self.job_input.get('input', {}).update(pb_output) # create job completion log and update job UVE self.result_handler.create_job_summary_log( job_template.fq_name) # in case of failures, exit the job manager process with failure if self.result_handler.job_result_status == JobStatus.FAILURE: job_error_msg = self.result_handler.job_summary_message except JobException as exp: err_msg = "Job Exception recieved: %s " % repr(exp) self._logger.error(err_msg) self._logger.error("%s" % traceback.format_exc()) self.result_handler.update_job_status(JobStatus.FAILURE, err_msg) if job_template: self.result_handler.create_job_summary_log( job_template.fq_name) job_error_msg = err_msg except Exception as exp: err_msg = "Error while executing job %s " % repr(exp) self._logger.error(err_msg) self._logger.error("%s" % traceback.format_exc()) self.result_handler.update_job_status(JobStatus.FAILURE, err_msg) self.result_handler.create_job_summary_log(job_template.fq_name) job_error_msg = err_msg finally: # need to wait for the last job log and uve update to complete # via sandesh and then close sandesh connection sandesh_util = SandeshUtils(self._logger) sandesh_util.close_sandesh_connection() self._logger.info("Closed Sandesh connection") if job_error_msg is not None: sys.exit(job_error_msg)
def execute_playbook(self, playbook_info): try: loader = DataLoader() inventory = InventoryManager(loader=loader, sources=['localhost']) variable_manager = VariableManager(loader=loader, inventory=inventory) Options = namedtuple('Options', [ 'listtags', 'listtasks', 'listhosts', 'syntax', 'connection', 'module_path', 'forks', 'remote_user', 'private_key_file', 'ssh_common_args', 'ssh_extra_args', 'sftp_extra_args', 'scp_extra_args', 'become', 'become_method', 'become_user', 'verbosity', 'check', 'diff' ]) options = Options(listtags=False, listtasks=False, listhosts=False, syntax=False, connection='ssh', module_path=None, forks=100, remote_user=None, private_key_file=None, ssh_common_args=None, ssh_extra_args=None, sftp_extra_args=None, scp_extra_args=None, become=None, become_method=None, become_user=None, verbosity=None, check=False, diff=False) variable_manager.extra_vars = playbook_info['extra_vars'] pbex = PlaybookExecutor(playbooks=[playbook_info['uri']], inventory=inventory, variable_manager=variable_manager, loader=loader, options=options, passwords=None) ret_val = pbex.run() if ret_val != 0: msg = MsgBundle.getMessage( MsgBundle.PLAYBOOK_RETURN_WITH_ERROR) raise Exception(msg) output = self.get_plugin_output(pbex) # if it comes here, it implies the pb has ended # So derive the playbook output to be # written to file and finally write END to the file try: unique_pb_id = playbook_input_json['extra_vars'][ 'playbook_input']['unique_pb_id'] exec_id = playbook_input_json['extra_vars']['playbook_input'][ 'job_execution_id'] # messages to be given to next playbooks(s) JM_LOGGER.info("Printing pb output results " "from pb_helper.py -->>>") JM_LOGGER.info(output) line_in_file = unique_pb_id + 'PLAYBOOK_OUTPUT##' + \ json.dumps(output) + 'PLAYBOOK_OUTPUT##' + \ '\n' with open("/tmp/" + exec_id, "a") as f: f.write(line_in_file + unique_pb_id + 'END' + '\n') except Exception, exc: ERR_MSG = "Error while trying to parse output"\ " from playbook due to exception: %s"\ % str(exc) with open("/tmp/" + exec_id, "a") as f: f.write(unique_pb_id + 'END' + '\n') JM_LOGGER.error(ERR_MSG) # not stopping execution just because of parsing error # no sys.exit therefore if output is None or output.get('status') is None: msg = MsgBundle.getMessage(MsgBundle.PLAYBOOK_OUTPUT_MISSING) raise Exception(msg) if output.get('status').lower() == "failure": msg = MsgBundle.getMessage(MsgBundle.PLAYBOOK_STATUS_FAILED) raise Exception(msg) return output
def get_playbook_info(self, job_percent_per_task, device_id=None): try: # create the cmd line param for the playbook extra_vars = { 'input': self._job_input, 'job_template_id': self._job_template.get_uuid(), 'job_template_fqname': self._job_template.fq_name, 'fabric_fq_name': self._fabric_fq_name, 'auth_token': self._auth_token, 'api_server_host': self._api_server_host, 'job_execution_id': self._execution_id, 'args': self._sandesh_args, 'vnc_api_init_params': self._vnc_api_init_params, 'playbook_job_percentage': job_percent_per_task } playbooks = self._job_template.get_job_template_playbooks() if device_id: if not self._device_json: msg = MsgBundle.getMessage(MsgBundle.DEVICE_JSON_NOT_FOUND) raise JobException(msg, self._execution_id) device_data = self._device_json.get(device_id) if not device_data: msg = MsgBundle.getMessage(MsgBundle.NO_DEVICE_DATA_FOUND, device_id=device_id) raise JobException(msg, self._execution_id) device_family = device_data.get('device_family') device_vendor = device_data.get('device_vendor') device_product = device_data.get('device_product') if not device_vendor or not device_family: msg = MsgBundle.getMessage( MsgBundle.DEVICE_VENDOR_FAMILY_MISSING, device_id=device_id) raise JobException(msg, self._execution_id) if not device_product: msg = MsgBundle.getMessage(MsgBundle.PRODUCT_NAME_MISSING, device_id=device_id) raise JobException(msg, self._execution_id) # check for credentials,required param; else playbooks # will fail device_username = device_data.get('device_username') device_password = device_data.get('device_password') if not device_username or not device_password: msg = MsgBundle.getMessage(MsgBundle.NO_CREDENTIALS_FOUND, device_id=device_id) raise JobException(msg, self._execution_id) # update extra-vars to reflect device-related params device_fqname = device_data.get('device_fqname') device_management_ip = device_data.get('device_management_ip') image_uuid = device_data.get('device_image_uuid') extra_vars.update({ 'device_id': device_id, 'device_fqname': device_fqname, 'device_management_ip': device_management_ip, 'vendor': device_vendor, 'device_family': device_family, 'device_username': device_username, 'device_password': device_password, 'product_name': device_product, 'device_image_uuid': image_uuid }) self._logger.debug("Passing the following device " "ip to playbook %s " % device_management_ip) # get the playbook uri from the job template play_info = playbooks.playbook_info[self._playbook_seq] playbook_input = {'playbook_input': extra_vars} playbook_info = dict() playbook_info['uri'] = play_info.playbook_uri playbook_info['extra_vars'] = playbook_input return playbook_info except JobException: raise except Exception as exp: msg = MsgBundle.getMessage( MsgBundle.GET_PLAYBOOK_INFO_ERROR, job_template_id=self._job_template.get_uuid(), exc_msg=repr(exp)) raise JobException(msg, self._execution_id)
def get_playbook_info(self, job_percent_per_task, device_id=None): try: # create the cmd line param for the playbook extra_vars = { 'input': self._job_input, 'job_template_id': self._job_template.get_uuid(), 'job_template_fqname': self._job_template.fq_name, 'fabric_fq_name': self._fabric_fq_name, 'auth_token': self._auth_token, 'api_server_host': self._api_server_host, 'job_execution_id': self._execution_id, 'args': self._sandesh_args, 'vnc_api_init_params': self._vnc_api_init_params, 'db_init_params': self._db_init_params, 'cluster_id': self._cluster_id, 'playbook_job_percentage': job_percent_per_task } playbooks = self._job_template.get_job_template_playbooks() if device_id: if not self._device_json: msg = MsgBundle.getMessage(MsgBundle.DEVICE_JSON_NOT_FOUND) raise JobException(msg, self._execution_id) device_data = self._device_json.get(device_id) if not device_data: msg = MsgBundle.getMessage(MsgBundle.NO_DEVICE_DATA_FOUND, device_id=device_id) raise JobException(msg, self._execution_id) device_family = device_data.get('device_family') device_vendor = device_data.get('device_vendor') device_product = device_data.get('device_product') if not device_vendor or not device_family: msg = MsgBundle.getMessage(MsgBundle. DEVICE_VENDOR_FAMILY_MISSING, device_id=device_id) raise JobException(msg, self._execution_id) if not device_product: msg = MsgBundle.getMessage(MsgBundle. PRODUCT_NAME_MISSING, device_id=device_id) raise JobException(msg, self._execution_id) # check for credentials,required param; else playbooks # will fail device_username = device_data.get('device_username') device_password = device_data.get('device_password') if not device_username or not device_password: msg = MsgBundle.getMessage(MsgBundle. NO_CREDENTIALS_FOUND, device_id=device_id) raise JobException(msg, self._execution_id) # update extra-vars to reflect device-related params device_fqname = device_data.get('device_fqname') device_management_ip = device_data.get('device_management_ip') image_uuid = device_data.get('device_image_uuid') hitless_upgrade = device_data.get('device_hitless_upgrade') extra_vars.update({ 'device_id': device_id, 'device_fqname': device_fqname, 'device_management_ip': device_management_ip, 'vendor': device_vendor, 'device_family': device_family, 'device_username': device_username, 'device_password': device_password, 'product_name': device_product, 'device_image_uuid': image_uuid, 'device_hitless_upgrade': hitless_upgrade }) self._logger.debug("Passing the following device " "ip to playbook %s " % device_management_ip) # get the playbook uri from the job template play_info = playbooks.playbook_info[self._playbook_seq] playbook_input = {'playbook_input': extra_vars} playbook_info = dict() playbook_info['uri'] = play_info.playbook_uri playbook_info['extra_vars'] = playbook_input return playbook_info except JobException: raise except Exception as exp: msg = MsgBundle.getMessage( MsgBundle.GET_PLAYBOOK_INFO_ERROR, job_template_id=self._job_template.get_uuid(), exc_msg=repr(exp)) raise JobException(msg, self._execution_id)
def handle_job(self, result_handler, job_percent_per_task, device_id=None, device_name=None): playbook_output = None playbook_info = None try: msg = "Starting playbook execution for job template %s with " \ "execution id %s" % (self._job_template.get_uuid(), self._execution_id) self._logger.debug(msg) # Always acquire the lock while executing the multi device jobs if device_id is not None: if not self._acquire_device_lock(device_id): raise JobException( MsgBundle.getMessage(MsgBundle.DEVICE_LOCK_FAILURE)) # get the playbook information from the job template playbook_info = self.get_playbook_info(job_percent_per_task, device_id) # run the playbook and retrieve the playbook output if any playbook_output = self.run_playbook( playbook_info, result_handler.percentage_completed) # retrieve the device_op_results in case it was set for # generic device operations. playbook_output_results = None if playbook_output != None: playbook_output_results = playbook_output.get('results') msg = MsgBundle.getMessage( MsgBundle.PLAYBOOK_EXECUTION_COMPLETE, job_template_name=self._job_template.get_fq_name()[-1], job_execution_id=self._execution_id) self._logger.debug(msg) result_handler.update_job_status( JobStatus.SUCCESS, msg, device_id, device_name, pb_results=playbook_output_results) if playbook_output: result_handler.update_playbook_output(playbook_output) self.check_and_send_prouter_job_uve_for_multidevice( playbook_info, JobStatus.SUCCESS.value, playbook_output_results) if self.current_percentage: result_handler.percentage_completed = self.current_percentage except JobException as job_exp: self._logger.error("%s" % job_exp.msg) self._logger.error("%s" % traceback.format_exc()) result_handler.update_job_status(JobStatus.FAILURE, job_exp.msg, device_id, device_name) if playbook_info: self.check_and_send_prouter_job_uve_for_multidevice( playbook_info, JobStatus.FAILURE.value) except Exception as exp: self._logger.error("Error while executing job %s " % repr(exp)) self._logger.error("%s" % traceback.format_exc()) result_handler.update_job_status(JobStatus.FAILURE, exp.message, device_id, device_name) if playbook_info: self.check_and_send_prouter_job_uve_for_multidevice( playbook_info, JobStatus.FAILURE.value) finally: if device_id is not None: self._release_device_lock(device_id)
def handle_job(self, result_handler, job_percent_per_task, device_id=None, device_name=None): playbook_output = None playbook_info = None try: msg = "Starting playbook execution for job template %s with " \ "execution id %s" % (self._job_template.get_uuid(), self._execution_id) self._logger.debug(msg) # Always acquire the lock while executing the multi device jobs if device_id is not None: if not self._acquire_device_lock(device_id): raise JobException(MsgBundle.getMessage( MsgBundle.DEVICE_LOCK_FAILURE)) # get the playbook information from the job template playbook_info = self.get_playbook_info(job_percent_per_task, device_id) # run the playbook and retrieve the playbook output if any playbook_output = self.run_playbook( playbook_info, result_handler.percentage_completed) # retrieve the device_op_results in case it was set for # generic device operations. playbook_output_results = None if playbook_output != None: playbook_output_results = playbook_output.get('results') msg = MsgBundle.getMessage( MsgBundle.PLAYBOOK_EXECUTION_COMPLETE, job_template_name=self._job_template.get_fq_name()[-1], job_execution_id=self._execution_id) self._logger.debug(msg) result_handler.update_job_status( JobStatus.SUCCESS, msg, device_id, device_name, pb_results=playbook_output_results) if playbook_output: result_handler.update_playbook_output(playbook_output) self.check_and_send_prouter_job_uve_for_multidevice( playbook_info, JobStatus.SUCCESS.value, playbook_output_results) if self.current_percentage: result_handler.percentage_completed = self.current_percentage except JobException as job_exp: self._logger.error("%s" % job_exp.msg) self._logger.error("%s" % traceback.format_exc()) result_handler.update_job_status(JobStatus.FAILURE, job_exp.msg, device_id, device_name) if playbook_info: self.check_and_send_prouter_job_uve_for_multidevice( playbook_info, JobStatus.FAILURE.value) except Exception as exp: self._logger.error("Error while executing job %s " % repr(exp)) self._logger.error("%s" % traceback.format_exc()) result_handler.update_job_status(JobStatus.FAILURE, exp.message, device_id, device_name) if playbook_info: self.check_and_send_prouter_job_uve_for_multidevice( playbook_info, JobStatus.FAILURE.value) finally: if device_id is not None: self._release_device_lock(device_id)
def create_job_summary_message(self): job_summary_message = MsgBundle.getMessage( MsgBundle.JOB_SUMMARY_MESSAGE_HDR) failed_device_jobs_len = len(self.failed_device_jobs) if self.job_result_status is None: job_summary_message += MsgBundle.getMessage( MsgBundle.JOB_RESULT_STATUS_NONE) elif self.job_result_status == JobStatus.FAILURE: if failed_device_jobs_len > 0: job_summary_message += MsgBundle.getMessage( MsgBundle.JOB_MULTI_DEVICE_FAILED_MESSAGE_HDR) for failed_device in self.failed_device_jobs: msg = failed_device + ',' job_summary_message += msg else: job_summary_message += "Job failed. " job_summary_message += "\n" elif self.job_result_status == JobStatus.SUCCESS: if failed_device_jobs_len > 0: self.job_result_status = JobStatus.WARNING job_summary_message += MsgBundle.getMessage( MsgBundle.JOB_EXECUTION_COMPLETE) device_job_result_len = len(self.job_result) if device_job_result_len > 0: job_summary_message += MsgBundle.getMessage( MsgBundle.PLAYBOOK_RESULTS_MESSAGE) job_summary_message += "Successfully completed "\ "job for %s devices.\n"\ % (device_job_result_len - failed_device_jobs_len) # result_summary would infact be the failed_devices # result summary # warning_summary is warning for multi device jobs result_summary = "" device_op_results = [] failed_device_names = [] warning_summary = "" for entry in self.job_result: if entry in self.failed_device_jobs: result_summary += \ "%s:%s \n" % (self.job_result[entry]['device_name'], self.job_result[entry]['message']) failed_device_names.append( self.job_result[entry]['device_name']) elif self.job_result[entry]['device_op_result']: # could be other device jobs such as device import, topology device_op_results.append( self.job_result[entry]['device_op_result']) if entry in self.warning_device_jobs: warning_summary += \ "%s: %s \n" % (self.job_result[entry]['device_name'], self.job_result[entry]['warning_message']) if result_summary != "": failed_device_msg = "Job execution failed for %s devices.\n"\ % len(self.failed_device_jobs) result_summary = failed_device_msg + result_summary job_summary_message += result_summary if self.job_result_message is not None: job_summary_message += self.job_result_message if self.job_warning_message or self.warning_device_jobs: job_summary_message += "\nJob execution had the following" \ " warnings: \n" job_summary_message += self.job_warning_message job_summary_message += warning_summary return job_summary_message, device_op_results, failed_device_names