def verify_settings(self): settings = [ 'round_time_sleep', 'worker_refresh_time' ] for setting_name in settings: if not Setting.get_setting(setting_name): logger.error("Must have " + setting_name + " setting.") exit(1)
def run(self): if self.total_rounds == 0: logger.info("Running engine for unlimited rounds") else: logger.info("Running engine for {0} round(s)".format(self.total_rounds)) while not self.is_last_round(): self.current_round += 1 logger.info("Running round: " + str(self.current_round)) self.round_running = True self.rounds_run += 1 services = self.session.query(Service).all()[:] random.shuffle(services) task_ids = {} for service in services: check_class = self.check_name_to_obj(service.check_name) if check_class is None: raise LookupError("Unable to map service to check code for " + str(service.check_name)) logger.debug("Adding " + service.team.name + ' - ' + service.name + " check to queue") environment = random.choice(service.environments) check_obj = check_class(environment) command_str = check_obj.command() job = Job(environment_id=environment.id, command=command_str) task = execute_command.apply_async(args=[job], queue=service.worker_queue) team_name = environment.service.team.name if team_name not in task_ids: task_ids[team_name] = [] task_ids[team_name].append(task.id) # This array keeps track of all current round objects # incase we need to backout any changes to prevent # inconsistent check results cleanup_items = [] try: # We store the list of tasks in the db, so that the web app # can consume them and can dynamically update a progress bar task_ids_str = json.dumps(task_ids) latest_kb = KB(name='task_ids', value=task_ids_str, round_num=self.current_round) cleanup_items.append(latest_kb) self.session.add(latest_kb) self.session.commit() pending_tasks = self.all_pending_tasks(task_ids) while pending_tasks: worker_refresh_time = int(Setting.get_setting('worker_refresh_time').value) waiting_info = "Waiting for all jobs to finish (sleeping " + str(worker_refresh_time) + " seconds)" waiting_info += " " + str(len(pending_tasks)) + " left in queue." logger.info(waiting_info) self.sleep(worker_refresh_time) pending_tasks = self.all_pending_tasks(task_ids) logger.info("All jobs have finished for this round") logger.info("Determining check results and saving to db") round_obj = Round(number=self.current_round) cleanup_items.append(round_obj) self.session.add(round_obj) self.session.commit() # We keep track of the number of passed and failed checks per round # so we can report a little bit at the end of each round teams = {} # Used so we import the finished checks at the end of the round finished_checks = [] for team_name, task_ids in task_ids.items(): for task_id in task_ids: task = execute_command.AsyncResult(task_id) environment = self.session.query(Environment).get(task.result['environment_id']) if task.result['errored_out']: result = False reason = CHECK_TIMED_OUT_TEXT else: if re.search(environment.matching_content, task.result['output']): result = True reason = CHECK_SUCCESS_TEXT else: result = False reason = CHECK_FAILURE_TEXT if environment.service.team.name not in teams: teams[environment.service.team.name] = { "Success": [], "Failed": [], } if result: teams[environment.service.team.name]['Success'].append(environment.service.name) else: teams[environment.service.team.name]['Failed'].append(environment.service.name) check = Check(service=environment.service, round=round_obj) # Grab the first 35,000 characters of output so it'll fit into our TEXT column, # which maxes at 2^32 (65536) characters check.finished(result=result, reason=reason, output=task.result['output'][:35000], command=task.result['command']) finished_checks.append(check) for finished_check in finished_checks: cleanup_items.append(finished_check) self.session.add(finished_check) self.session.commit() except Exception as e: # We got an error while writing to db (could be normal docker stop command) # but we gotta clean up any trace of the current round so when we startup # again, we're at a consistent state logger.error('Error received while writing check results to db') logger.exception(e) logger.error('Ending round and cleaning up the db') for cleanup_item in cleanup_items: try: self.session.delete(cleanup_item) self.session.commit() except Exception: pass sys.exit(1) logger.info("Finished Round " + str(self.current_round)) logger.info("Round Stats:") for team_name in sorted(teams): stat_string = " " + team_name stat_string += " Success: " + str(len(teams[team_name]['Success'])) stat_string += ", Failed: " + str(len(teams[team_name]['Failed'])) if len(teams[team_name]['Failed']) > 0: stat_string += ' (' + ', '.join(teams[team_name]['Failed']) + ')' logger.info(stat_string) logger.info("Updating Caches") update_all_cache() self.round_running = False if not self.is_last_round(): round_time_sleep = int(Setting.get_setting('round_time_sleep').value) logger.info("Sleeping in between rounds (" + str(round_time_sleep) + " seconds)") self.sleep(round_time_sleep) logger.info("Engine finished running")
def verify_settings(self): settings = ["target_round_time", "worker_refresh_time"] for setting_name in settings: if not Setting.get_setting(setting_name): logger.error("Must have " + setting_name + " setting.") exit(1)