def run_deviation_validation_hypothesis(experiment: Experiment, journal: Journal, configuration: Configuration, secrets: Secrets, event_registry: EventHandlerRegistry, dry: bool = False) \ -> Dict[str, Any]: """ Run the hypothesis after the method and report to the journal if the experiment has deviated. """ logger.debug("Running steady-state hypothesis after the method") event_registry.start_hypothesis_after(experiment) state = run_steady_state_hypothesis(experiment, configuration, secrets, dry=dry) journal["steady_states"]["after"] = state event_registry.hypothesis_after_completed(experiment, state, journal) if state is not None and \ not state["steady_state_met"]: journal["deviated"] = True journal["status"] = "failed" p = state["probes"][-1] logger.fatal("Steady state probe '{p}' is not in the " "given tolerance so failing this " "experiment".format(p=p["activity"]["name"])) return state
def run_gate_hypothesis(experiment: Experiment, journal: Journal, configuration: Configuration, secrets: Secrets, event_registry: EventHandlerRegistry, dry: bool = False) -> Dict[str, Any]: """ Run the hypothesis before the method and bail the execution if it did not pass. """ logger.debug("Running steady-state hypothesis before the method") event_registry.start_hypothesis_before(experiment) state = run_steady_state_hypothesis(experiment, configuration, secrets, dry=dry) journal["steady_states"]["before"] = state event_registry.hypothesis_before_completed(experiment, state, journal) if state is not None and not state["steady_state_met"]: journal["steady_states"]["before"] = state journal["status"] = "failed" p = state["probes"][-1] logger.fatal("Steady state probe '{p}' is not in the given " "tolerance so failing this experiment".format( p=p["activity"]["name"])) return return state
def run_hypothesis_continuously( event: threading.Event, schedule: Schedule, experiment: Experiment, journal: Journal, configuration: Configuration, secrets: Secrets, event_registry: EventHandlerRegistry, dry: bool = False, ): frequency = schedule.continuous_hypothesis_frequency fail_fast_ratio = schedule.fail_fast_ratio event_registry.start_continuous_hypothesis(frequency) logger.info( "Executing the steady-state hypothesis continuously " "every {} seconds".format(frequency) ) failed_iteration = 0 failed_ratio = 0 iteration = 1 while not event.is_set(): # already marked as terminated, let's exit now if journal["status"] in ["failed", "interrupted", "aborted"]: break state = run_steady_state_hypothesis(experiment, configuration, secrets, dry=dry) journal["steady_states"]["during"].append(state) event_registry.continuous_hypothesis_iteration(iteration, state) if state is not None and not state["steady_state_met"]: failed_iteration += 1 failed_ratio = (failed_iteration * 100) / iteration p = state["probes"][-1] logger.warning( "Continuous steady state probe '{p}' is not in the given " "tolerance".format(p=p["activity"]["name"]) ) if schedule.fail_fast: if failed_ratio >= fail_fast_ratio: m = "Terminating immediately the experiment" if failed_ratio != 0.0: m = "{} after {:.1f}% hypothesis deviated".format( m, failed_ratio ) logger.info(m) journal["status"] = "failed" break iteration += 1 # we do not adjust the frequency based on the time taken by probes # above. We really want frequency seconds between two iteration # not frequency as a total time of a single iteration event.wait(timeout=frequency)
def run_experiment(experiment: Experiment, settings: Settings = None) -> Journal: """ Run the given `experiment` method step by step, in the following sequence: steady probe, action, close probe. Activities can be executed in background when they have the `"background"` property set to `true`. In that case, the activity is run in a thread. By the end of runs, those threads block until they are all complete. If the experiment has the `"dry"` property set to `False`, the experiment runs without actually executing the activities. NOTE: Tricky to make a decision whether we should rollback when exiting abnormally (Ctrl-C, SIGTERM...). Afterall, there is a chance we actually cannot afford to rollback properly. Better bailing to a conservative approach. This means we swallow :exc:`KeyboardInterrupt` and :exc:`SystemExit` and do not bubble it back up to the caller. We when were interrupted, we set the `interrupted` flag of the result accordingly to notify the caller this was indeed not terminated properly. """ logger.info("Running experiment: {t}".format(t=experiment["title"])) dry = experiment.get("dry", False) if dry: logger.warning("Dry mode enabled") started_at = time.time() settings = settings if settings is not None else get_loaded_settings() config = load_configuration(experiment.get("configuration", {})) secrets = load_secrets(experiment.get("secrets", {}), config) initialize_global_controls(experiment, config, secrets, settings) initialize_controls(experiment, config, secrets) activity_pool, rollback_pool = get_background_pools(experiment) control = Control() journal = initialize_run_journal(experiment) try: try: control.begin("experiment", experiment, experiment, config, secrets) # this may fail the entire experiment right there if any of the # probes fail or fall out of their tolerance zone try: state = run_steady_state_hypothesis(experiment, config, secrets, dry=dry) journal["steady_states"]["before"] = state if state is not None and not state["steady_state_met"]: p = state["probes"][-1] raise ActivityFailed( "Steady state probe '{p}' is not in the given " "tolerance so failing this experiment".format( p=p["activity"]["name"])) except ActivityFailed as a: journal["steady_states"]["before"] = state journal["status"] = "failed" logger.fatal(str(a)) else: try: journal["run"] = apply_activities(experiment, config, secrets, activity_pool, dry) except Exception: journal["status"] = "aborted" logger.fatal( "Experiment ran into an un expected fatal error, " "aborting now.", exc_info=True) else: try: state = run_steady_state_hypothesis(experiment, config, secrets, dry=dry) journal["steady_states"]["after"] = state if state is not None and not state["steady_state_met"]: journal["deviated"] = True p = state["probes"][-1] raise ActivityFailed( "Steady state probe '{p}' is not in the given " "tolerance so failing this experiment".format( p=p["activity"]["name"])) except ActivityFailed as a: journal["status"] = "failed" logger.fatal(str(a)) except InterruptExecution as i: journal["status"] = "interrupted" logger.fatal(str(i)) except (KeyboardInterrupt, SystemExit): journal["status"] = "interrupted" logger.warn("Received an exit signal, " "leaving without applying rollbacks.") else: journal["status"] = journal["status"] or "completed" journal["rollbacks"] = apply_rollbacks(experiment, config, secrets, rollback_pool, dry) journal["end"] = datetime.utcnow().isoformat() journal["duration"] = time.time() - started_at has_deviated = journal["deviated"] status = "deviated" if has_deviated else journal["status"] logger.info("Experiment ended with status: {s}".format(s=status)) if has_deviated: logger.info( "The steady-state has deviated, a weakness may have been " "discovered") control.with_state(journal) try: control.end("experiment", experiment, experiment, config, secrets) except ChaosException: logger.debug("Failed to close controls", exc_info=True) finally: cleanup_controls(experiment) cleanup_global_controls() return journal