def test_get_globally_loaded_controls_from_settings(): assert get_global_controls() == [] settings = { "controls": { "dummy": { "provider": { "type": "python", "module": "fixtures.controls.dummy" } } } } load_global_controls(settings) initialize_global_controls({}, {}, {}, settings) try: ctrls = get_global_controls() assert len(ctrls) == 1 assert ctrls[0]["name"] == "dummy" assert ctrls[0]["provider"]["type"] == "python" assert ctrls[0]["provider"]["module"] == "fixtures.controls.dummy" finally: cleanup_global_controls() assert get_global_controls() == []
def test_controls_on_loaded_experiment(): settings = { "controls": { "dummy": { "provider": { "type": "python", "module": "fixtures.controls.dummy_retitle_experiment_on_loading" } } } } load_global_controls(settings) initialize_global_controls({}, {}, {}, settings) with tempfile.NamedTemporaryFile(suffix=".json") as f: try: f.write( json.dumps(experiments.ExperimentNoControls).encode('utf-8')) f.seek(0) experiment = load_experiment(f.name) assert experiment["title"] == "BOOM I changed it" finally: cleanup_global_controls()
def test_load_global_controls_from_settings(): exp = deepcopy(experiments.ExperimentWithControls) try: run_experiment(exp) activities = get_all_activities(exp) for activity in activities: if "controls" in activity: assert "before_activity_control" not in activity assert "after_activity_control" not in activity finally: cleanup_global_controls() initialize_global_controls({ "dummy-key": "blah", "controls": { "dummy": { "provider": { "type": "python", "module": "fixtures.controls.dummy" } } } }) try: run_experiment(exp) activities = get_all_activities(exp) for activity in activities: if "controls" in activity: assert activity["before_activity_control"] is True assert activity["after_activity_control"] is True finally: cleanup_global_controls()
def run(ctx: click.Context, source: str, journal_path: str = "./journal.json", dry: bool = False, no_validation: bool = False, no_exit: bool = False) -> Journal: """Run the experiment loaded from SOURCE, either a local file or a HTTP resource.""" settings = load_settings(ctx.obj["settings_path"]) or {} initialize_global_controls(settings) has_deviated = False has_failed = False try: try: experiment = load_experiment( click.format_filename(source), settings) except InvalidSource as x: logger.error(str(x)) logger.debug(x) ctx.exit(1) notify(settings, RunFlowEvent.RunStarted, experiment) if not no_validation: try: ensure_experiment_is_valid(experiment) except ChaosException as x: logger.error(str(x)) logger.debug(x) ctx.exit(1) experiment["dry"] = dry journal = run_experiment(experiment) has_deviated = journal.get("deviated", False) has_failed = journal["status"] != "completed" with io.open(journal_path, "w") as r: json.dump( journal, r, indent=2, ensure_ascii=False, default=encoder) if journal["status"] == "completed": notify(settings, RunFlowEvent.RunCompleted, journal) elif has_failed: notify(settings, RunFlowEvent.RunFailed, journal) if has_deviated: notify(settings, RunFlowEvent.RunDeviated, journal) finally: cleanup_global_controls() if (has_failed or has_deviated) and not no_exit: ctx.exit(1) return journal
def test_controls_on_loading_experiment(): settings = { "controls": { "dummy": { "provider": { "type": "python", "module": "fixtures.controls.dummy_fail_loading_experiment" } } } } load_global_controls(settings) initialize_global_controls({}, {}, {}, settings) with tempfile.NamedTemporaryFile(suffix=".json") as f: try: with pytest.raises(InterruptExecution): load_experiment(f.name) finally: cleanup_global_controls()
def _run( self, strategy: Strategy, schedule: Schedule, # noqa: C901 experiment: Experiment, journal: Journal, configuration: Configuration, secrets: Secrets, settings: Settings, event_registry: EventHandlerRegistry) -> None: experiment["title"] = substitute(experiment["title"], configuration, secrets) logger.info("Running experiment: {t}".format(t=experiment["title"])) started_at = time.time() journal = journal or initialize_run_journal(experiment) event_registry.started(experiment, journal) control = Control() activity_pool, rollback_pool = get_background_pools(experiment) hypo_pool = get_hypothesis_pool() continous_hypo_event = threading.Event() dry = experiment.get("dry", False) if dry: logger.warning("Dry mode enabled") initialize_global_controls(experiment, configuration, secrets, settings) initialize_controls(experiment, configuration, secrets) logger.info("Steady-state strategy: {}".format(strategy.value)) rollback_strategy = settings.get("runtime", {}).get("rollbacks", {}).get( "strategy", "default") logger.info("Rollbacks strategy: {}".format(rollback_strategy)) exit_gracefully_with_rollbacks = True with_ssh = has_steady_state_hypothesis_with_probes(experiment) if not with_ssh: logger.info("No steady state hypothesis defined. That's ok, just " "exploring.") try: try: control.begin("experiment", experiment, experiment, configuration, secrets) state = object() if with_ssh and should_run_before_method(strategy): state = run_gate_hypothesis(experiment, journal, configuration, secrets, event_registry, dry) if state is not None: if with_ssh and should_run_during_method(strategy): run_hypothesis_during_method(hypo_pool, continous_hypo_event, strategy, schedule, experiment, journal, configuration, secrets, event_registry, dry) state = run_method(strategy, activity_pool, experiment, journal, configuration, secrets, event_registry, dry) continous_hypo_event.set() if journal["status"] not in ["interrupted", "aborted"]: if with_ssh and (state is not None) and \ should_run_after_method(strategy): run_deviation_validation_hypothesis( experiment, journal, configuration, secrets, event_registry, dry) except InterruptExecution as i: journal["status"] = "interrupted" logger.fatal(str(i)) event_registry.interrupted(experiment, journal) except KeyboardInterrupt: journal["status"] = "interrupted" logger.warning("Received a termination signal (Ctrl-C)...") event_registry.signal_exit() except SystemExit as x: journal["status"] = "interrupted" logger.warning("Received the exit signal: {}".format(x.code)) exit_gracefully_with_rollbacks = x.code != 30 if not exit_gracefully_with_rollbacks: logger.warning("Ignoring rollbacks as per signal") event_registry.signal_exit() finally: hypo_pool.shutdown(wait=True) # just in case a signal overrode everything else to tell us not to # play them anyway (see the exit.py module) if exit_gracefully_with_rollbacks: run_rollback(rollback_strategy, rollback_pool, experiment, journal, configuration, secrets, event_registry, dry) journal["end"] = datetime.utcnow().isoformat() journal["duration"] = time.time() - started_at # the spec only allows these statuses, so if it's anything else # we override to "completed" if journal["status"] not in ("completed", "failed", "aborted", "interrupted"): journal["status"] = "completed" has_deviated = journal["deviated"] status = "deviated" if has_deviated else journal["status"] logger.info("Experiment ended with status: {s}".format(s=status)) if has_deviated: logger.info( "The steady-state has deviated, a weakness may have been " "discovered") control.with_state(journal) try: control.end("experiment", experiment, experiment, configuration, secrets) except ChaosException: logger.debug("Failed to close controls", exc_info=True) finally: try: cleanup_controls(experiment) cleanup_global_controls() finally: event_registry.finish(journal) return journal
def run_experiment(experiment: Experiment, settings: Settings = None) -> Journal: """ Run the given `experiment` method step by step, in the following sequence: steady probe, action, close probe. Activities can be executed in background when they have the `"background"` property set to `true`. In that case, the activity is run in a thread. By the end of runs, those threads block until they are all complete. If the experiment has the `"dry"` property set to `False`, the experiment runs without actually executing the activities. NOTE: Tricky to make a decision whether we should rollback when exiting abnormally (Ctrl-C, SIGTERM...). Afterall, there is a chance we actually cannot afford to rollback properly. Better bailing to a conservative approach. This means we swallow :exc:`KeyboardInterrupt` and :exc:`SystemExit` and do not bubble it back up to the caller. We when were interrupted, we set the `interrupted` flag of the result accordingly to notify the caller this was indeed not terminated properly. """ logger.info("Running experiment: {t}".format(t=experiment["title"])) dry = experiment.get("dry", False) if dry: logger.warning("Dry mode enabled") started_at = time.time() settings = settings if settings is not None else get_loaded_settings() config = load_configuration(experiment.get("configuration", {})) secrets = load_secrets(experiment.get("secrets", {}), config) initialize_global_controls(experiment, config, secrets, settings) initialize_controls(experiment, config, secrets) activity_pool, rollback_pool = get_background_pools(experiment) control = Control() journal = initialize_run_journal(experiment) try: try: control.begin("experiment", experiment, experiment, config, secrets) # this may fail the entire experiment right there if any of the # probes fail or fall out of their tolerance zone try: state = run_steady_state_hypothesis(experiment, config, secrets, dry=dry) journal["steady_states"]["before"] = state if state is not None and not state["steady_state_met"]: p = state["probes"][-1] raise ActivityFailed( "Steady state probe '{p}' is not in the given " "tolerance so failing this experiment".format( p=p["activity"]["name"])) except ActivityFailed as a: journal["steady_states"]["before"] = state journal["status"] = "failed" logger.fatal(str(a)) else: try: journal["run"] = apply_activities(experiment, config, secrets, activity_pool, dry) except Exception: journal["status"] = "aborted" logger.fatal( "Experiment ran into an un expected fatal error, " "aborting now.", exc_info=True) else: try: state = run_steady_state_hypothesis(experiment, config, secrets, dry=dry) journal["steady_states"]["after"] = state if state is not None and not state["steady_state_met"]: journal["deviated"] = True p = state["probes"][-1] raise ActivityFailed( "Steady state probe '{p}' is not in the given " "tolerance so failing this experiment".format( p=p["activity"]["name"])) except ActivityFailed as a: journal["status"] = "failed" logger.fatal(str(a)) except InterruptExecution as i: journal["status"] = "interrupted" logger.fatal(str(i)) except (KeyboardInterrupt, SystemExit): journal["status"] = "interrupted" logger.warn("Received an exit signal, " "leaving without applying rollbacks.") else: journal["status"] = journal["status"] or "completed" journal["rollbacks"] = apply_rollbacks(experiment, config, secrets, rollback_pool, dry) journal["end"] = datetime.utcnow().isoformat() journal["duration"] = time.time() - started_at has_deviated = journal["deviated"] status = "deviated" if has_deviated else journal["status"] logger.info("Experiment ended with status: {s}".format(s=status)) if has_deviated: logger.info( "The steady-state has deviated, a weakness may have been " "discovered") control.with_state(journal) try: control.end("experiment", experiment, experiment, config, secrets) except ChaosException: logger.debug("Failed to close controls", exc_info=True) finally: cleanup_controls(experiment) cleanup_global_controls() return journal