Пример #1
0
def run_gate_hypothesis(experiment: Experiment,
                        journal: Journal,
                        configuration: Configuration,
                        secrets: Secrets,
                        event_registry: EventHandlerRegistry,
                        dry: bool = False) -> Dict[str, Any]:
    """
    Run the hypothesis before the method and bail the execution if it did
    not pass.
    """
    logger.debug("Running steady-state hypothesis before the method")
    event_registry.start_hypothesis_before(experiment)
    state = run_steady_state_hypothesis(experiment,
                                        configuration,
                                        secrets,
                                        dry=dry)
    journal["steady_states"]["before"] = state
    event_registry.hypothesis_before_completed(experiment, state, journal)
    if state is not None and not state["steady_state_met"]:
        journal["steady_states"]["before"] = state
        journal["status"] = "failed"

        p = state["probes"][-1]
        logger.fatal("Steady state probe '{p}' is not in the given "
                     "tolerance so failing this experiment".format(
                         p=p["activity"]["name"]))
        return
    return state
Пример #2
0
def run_method(strategy: Strategy,
               activity_pool: ThreadPoolExecutor,
               experiment: Experiment,
               journal: Journal,
               configuration: Configuration,
               secrets: Secrets,
               event_registry: EventHandlerRegistry,
               dry: bool = False) -> Optional[List[Run]]:
    logger.info("Playing your experiment's method now...")
    event_registry.start_method(experiment)
    try:
        state = apply_activities(experiment, configuration, secrets,
                                 activity_pool, journal, dry)
        event_registry.method_completed(experiment, state)
        return state
    except InterruptExecution:
        event_registry.method_completed(experiment)
        raise
    except Exception:
        journal["status"] = "aborted"
        event_registry.method_completed(experiment)
        logger.fatal(
            "Experiment ran into an un expected fatal error, "
            "aborting now.",
            exc_info=True)
Пример #3
0
def discover(
    ctx: click.Context,
    package: str,
    discovery_path: str = "./discovery.json",
    no_system_info: bool = False,
    no_install: bool = False,
) -> Discovery:
    """Discover capabilities and experiments."""
    settings = load_settings(ctx.obj["settings_path"])
    try:
        notify(settings, DiscoverFlowEvent.DiscoverStarted, package)
        discovery = disco(
            package_name=package,
            discover_system=not no_system_info,
            download_and_install=not no_install,
        )
    except DiscoveryFailed as err:
        notify(settings, DiscoverFlowEvent.DiscoverFailed, package, err)
        logger.debug(f"Failed to discover {package}", exc_info=err)
        logger.fatal(str(err))
        return

    with open(discovery_path, "w") as d:
        d.write(json.dumps(discovery, indent=2, default=encoder))
    logger.info(f"Discovery outcome saved in {discovery_path}")

    notify(settings, DiscoverFlowEvent.DiscoverCompleted, discovery)
    return discovery
Пример #4
0
    def changePassword(self, newPassword):
        logger.debug("CurrentPassword: {}, NewPassword: {}".format(
            self.getCurrentPassword(), newPassword))
        # check against cur pwd
        if self._verify_password(self.getCurrentPassword(), str(newPassword)):
            logger.fatal("Passwords match, heading back.")
            raise PasswordMatchException("Matches Current Go Home")
        # check against historical pwds
        historicalPasswords = self.getPasswordHistory()
        if newPassword in historicalPasswords:
            logger.fatal(
                "Password is in historical list, try again. {}".format(
                    historicalPasswords))
            raise HistoricalPasswordMatchException("Found in historical list")
        logger.debug("Current Historical List: {}".format(historicalPasswords))

        # got here, so lets update the pwd and add it to the history

        #stop storing pwds in clear
        self.curPwd = self._hash_password(str(newPassword))

        #historical pwds being stored in clear, need to check/prevent this
        historicalPasswords.append(newPassword)
        if len(historicalPasswords) >= MAX_HISTORICAL_PWDS:
            historicalPasswords.remove(historicalPasswords[0])
        logger.info("History: {}".format(len(historicalPasswords)))

        logger.debug("Password changed to {} hashed as {}".format(
            newPassword, self.curPwd))
Пример #5
0
def run_deviation_validation_hypothesis(experiment: Experiment,
                                        journal: Journal,
                                        configuration: Configuration,
                                        secrets: Secrets,
                                        event_registry: EventHandlerRegistry,
                                        dry: bool = False) \
                                            -> Dict[str, Any]:
    """
    Run the hypothesis after the method and report to the journal if the
    experiment has deviated.
    """
    logger.debug("Running steady-state hypothesis after the method")
    event_registry.start_hypothesis_after(experiment)
    state = run_steady_state_hypothesis(experiment,
                                        configuration,
                                        secrets,
                                        dry=dry)
    journal["steady_states"]["after"] = state
    event_registry.hypothesis_after_completed(experiment, state, journal)
    if state is not None and \
            not state["steady_state_met"]:
        journal["deviated"] = True
        journal["status"] = "failed"
        p = state["probes"][-1]
        logger.fatal("Steady state probe '{p}' is not in the "
                     "given tolerance so failing this "
                     "experiment".format(p=p["activity"]["name"]))
    return state
Пример #6
0
def create_repository():
    logger.info(f"creating repository {settings.REPOSITORY_NAME}")

    r = None
    try:
        url = f"{settings.ES_HOST}/_snapshot/{settings.REPOSITORY_NAME}"

        logger.debug(
            f"request: url={url} data={settings.REPOSITORY_SETTINGS} timeout={settings.REQUEST_TIMEOUT_SECONDS}"
        )

        r = requests.put(url=url,
                         data=settings.REPOSITORY_SETTINGS,
                         timeout=settings.REQUEST_TIMEOUT_SECONDS)

    except Exception as e:
        logger.fatal(
            f"problem while creating repository on {settings.ES_HOST}: {str(e)}"
        )
        raise

    logger.debug(f"response: {r} {r.text}")

    if r.status_code == 200:
        announce(f"repository {settings.REPOSITORY_NAME} created")
    else:
        announce(
            f"repository {settings.REPOSITORY_NAME} not created! reason: {r} {r.text}"
        )

    return r.status_code == 200
Пример #7
0
def run_rollback(
    rollback_strategy: str,
    rollback_pool: ThreadPoolExecutor,
    experiment: Experiment,
    journal: Journal,
    configuration: Configuration,
    secrets: Secrets,
    event_registry: EventHandlerRegistry,
    dry: bool = False,
) -> None:
    has_deviated = journal["deviated"]
    journal_status = journal["status"]
    play_rollbacks = False
    if rollback_strategy == "always":
        logger.warning("Rollbacks were explicitly requested to be played")
        play_rollbacks = True
    elif rollback_strategy == "never":
        logger.warning("Rollbacks were explicitly requested to not be played")
        play_rollbacks = False
    elif rollback_strategy == "default" and journal_status not in [
        "failed",
        "interrupted",
    ]:
        play_rollbacks = True
    elif rollback_strategy == "deviated":
        if has_deviated:
            logger.warning(
                "Rollbacks will be played only because the experiment " "deviated"
            )
            play_rollbacks = True
        else:
            logger.warning(
                "Rollbacks were explicitely requested to be played "
                "only if the experiment deviated. Since this is not "
                "the case, we will not play them."
            )

    if play_rollbacks:
        event_registry.start_rollbacks(experiment)
        try:
            journal["rollbacks"] = apply_rollbacks(
                experiment, configuration, secrets, rollback_pool, dry
            )
        except InterruptExecution as i:
            journal["status"] = "interrupted"
            logger.fatal(str(i))
        except (KeyboardInterrupt, SystemExit):
            journal["status"] = "interrupted"
            logger.warning(
                "Received an exit signal."
                "Terminating now without running the "
                "remaining rollbacks."
            )
        finally:
            event_registry.rollbacks_completed(experiment, journal)
Пример #8
0
 def completed(f: Future):
     exc = f.exception()
     event_registry.continous_hypothesis_completed(experiment, journal, exc)
     if exc is not None:
         if isinstance(exc, InterruptExecution):
             journal["status"] = "interrupted"
             logger.fatal(str(exc))
         elif isinstance(exc, Exception):
             journal["status"] = "aborted"
             logger.fatal(str(exc))
     logger.info("Continous steady state hypothesis terminated")
Пример #9
0
def create_snapshot():
    timestamp = datetime.utcfromtimestamp(time.time()).strftime('%Y%m%d%H%M%S')
    snapshot_name = f"{settings.SNAPSHOT_PREFIX}{timestamp}"
    logger.info(
        f"creating snapshot {snapshot_name} in {settings.REPOSITORY_NAME}")

    wait_for_completion = (settings.SNAPSHOT_TIMEOUT_SECONDS != 0)

    timeout = settings.SNAPSHOT_TIMEOUT_SECONDS

    if wait_for_completion:
        logger.info(
            f"shutterbug will wait {settings.SNAPSHOT_TIMEOUT_SECONDS} second(s) maximum"
        )
    else:
        logger.info(f"shutterbug will not wait for snapshot to be complete")
        timeout = settings.REQUEST_TIMEOUT_SECONDS

    json = {
        "ignore_unavailable": settings.IGNORE_UNAVAILABLE,
        "include_global_state": settings.INCLUDE_GLOBAL_STATE
    }

    if settings.INDEX_NAMES is None or len(settings.INDEX_NAMES) == 0:
        logger.info(f"all indices will be included")
    else:
        logger.info(
            f"only these indices will be included: {settings.INDEX_NAMES}")
        json["index_names"] = settings.INDEX_NAMES

    r = None
    try:
        url = f"{settings.ES_HOST}/_snapshot/{settings.REPOSITORY_NAME}/{snapshot_name}?wait_for_completion={str(wait_for_completion).lower()}"

        logger.debug(f"request: url={url} data={json} timeout={timeout}")

        r = requests.put(url=url, json=json, timeout=timeout)
    except Exception as e:
        logger.fatal(
            f"problem while creating snapshot {snapshot_name} on {settings.ES_HOST}: {str(e)}"
        )
        raise

    logger.debug(f"response: {r} {r.text}")

    if r.status_code == 200:
        if wait_for_completion:
            announce(f"snapshot {snapshot_name} created")
        else:
            announce(f"snapshot {snapshot_name} accepted")
    else:
        announce(f"snapshot {snapshot_name} not created! reason: {r} {r.text}")

    return r.status_code == 200
Пример #10
0
def check_required():
    """Function to check that all the required config variables are set."""
    fail = False

    for key in REQUIRED_CONFIG:
        var = _key_to_var(key)
        if os.getenv(var) is None:
            fail = True
            logger.fatal(("Please make sure that the required environment "
                          "variable '{}' is set!").format(var))

    if fail:
        sys.exit(1)
Пример #11
0
def repository_exists():
    logger.info(f"checking if repository {settings.REPOSITORY_NAME} exists")
    r = None
    try:
        r = requests.get(
            url=f"{settings.ES_HOST}/_snapshot/{settings.REPOSITORY_NAME}",
            timeout=settings.REQUEST_TIMEOUT_SECONDS)
    except Exception as e:
        logger.fatal(f"problem while contacting {settings.ES_HOST}: {str(e)}")
        raise

    logger.debug(f"response: {r} {r.text}")

    return r.status_code != 404
Пример #12
0
def main():
    logger.info("starting...")

    setup_signal_handling()

    last_hello_emitted = time.time()

    semaphore_file = settings.SEMAPHORE_FILE

    if settings.USE_ECS_TASK_STRATEGY:
        logger.info("using ECS Task strategy for semaphore token")

        if len(settings.ECS_TASK_STRATEGY_ENDPOINT) is None:
            logger.fatal("ECS_TASK_STRATEGY_ENDPOINT was empty")
            return

        r = requests.get(settings.ECS_TASK_STRATEGY_ENDPOINT)
        metadata = r.json()
        logger.debug("metadata was: " + json.dumps(metadata))
        task_id = metadata["Labels"]["com.amazonaws.ecs.task-arn"].split(
            "/")[-1]
        logger.debug("task_id: " + task_id)
        semaphore_file = settings.ECS_TASK_STRATEGY_SEMAPHORE_FILE_TEMPLATE.replace(
            "##task_id##", task_id)
        logger.info("semaphore file set to " + semaphore_file)

    if settings.USE_SEMAPHORE_FILE_STRATEGY:
        if settings.SEMAPHORE_FILE_ENSURE_REMOVED:
            logger.info("ensuring semaphore file at " + semaphore_file +
                        " is removed")

            if os.path.isfile(semaphore_file):
                logger.info("semaphore file exists, removing")
                os.unlink(semaphore_file)

    while not requested_to_quit:
        age = int(time.time() - last_hello_emitted)
        if age > settings.SAY_HELLO_SECONDS:
            logger.info("running...")
            last_hello_emitted = time.time()

        time.sleep(settings.SLEEP_SECONDS)

    if requested_to_quit:
        if settings.USE_SEMAPHORE_FILE_STRATEGY:
            logger.info("touching semaphore file at " + semaphore_file)
            open(semaphore_file, 'a').close()

    logger.info("done")
Пример #13
0
def remove_snapshot(snapshot_name):
    logger.info(f"remove_snapshot {snapshot_name}")

    r = None
    try:
        url = f"{settings.ES_HOST}/_snapshot/{settings.REPOSITORY_NAME}/{snapshot_name}"
        r = requests.delete(url, timeout=settings.REQUEST_TIMEOUT_SECONDS)
    except Exception as e:
        logger.fatal(
            f"problem during removal of snapshot {snapshot_name}: {str(e)}")
        raise

    logger.debug(f"response: {r} {r.text}")

    return r.status_code == 200
Пример #14
0
def main():
    announce("starting...")

    try:
        if not repository_exists():
            if not create_repository():
                raise Exception("couldn't create repository")

        if settings.REMOVE_OLDER_THAN_DAYS > 0:
            remove_old_snapshots()

        if not create_snapshot():
            raise Exception("couldn't create snapshot")

    except Exception as e:
        logger.fatal(str(e))
        if settings.ENABLE_SLACK:
            slack_announce(message=str(e))
Пример #15
0
def discover(package: str,
             discovery_path: str = "./discovery.json",
             no_system_info: bool = False,
             no_install: bool = False) -> Discovery:
    """Discover capabilities and experiments."""
    try:
        discovery = disco(package_name=package,
                          discover_system=not no_system_info,
                          download_and_install=not no_install)
    except DiscoveryFailed as err:
        logger.debug("Failed to discover {}".format(package), exc_info=err)
        logger.fatal(str(err))
        return

    with open(discovery_path, "w") as d:
        d.write(json.dumps(discovery, indent=2))
    logger.info("Discovery outcome saved in {p}".format(p=discovery_path))

    return discovery
Пример #16
0
def remove_old_snapshots():
    logger.debug(f"remove_old_snapshots()")

    # get list of repository snapshots
    r = None

    try:
        url = f"{settings.ES_HOST}/_snapshot/{settings.REPOSITORY_NAME}/_all"

        logger.debug(f"request: {url}")

        r = requests.get(url=url, timeout=settings.REQUEST_TIMEOUT_SECONDS)

    except Exception as e:
        logger.fatal(f"problem while getting list of snapshots: {str(e)}")
        raise

    logger.debug(f"response: {r} {r.text}")

    snapshots = r.json()

    if "snapshots" not in snapshots:
        logger.fatal(
            f"couldn't find 'snapshots' property in response: {r} {r.text}")
        raise Exception(
            f"problem parsing reply from list of snapshots: {r} {r.text}")

    for snapshot in snapshots["snapshots"]:
        start_time = snapshot["start_time"]
        snapshot_name = snapshot["snapshot"]

        # start_time is in Zulu time - e.g. "start_time": "2019-02-23T19:39:28.043Z"
        # this is ISO-8601 compliant
        snapshot_date = dateutil.parser.parse(start_time)
        delta = datetime.now(timezone.utc) - snapshot_date

        if settings.REMOVE_OLDER_THAN_DAYS > 0 and delta.days > settings.REMOVE_OLDER_THAN_DAYS:
            logger.info(f"snapshot {snapshot_name} is a candidate for removal")
            if remove_snapshot(snapshot_name):
                announce(f"snapshot {snapshot_name} has been removed")
            else:
                raise Exception(f"snapshot removal failed")
Пример #17
0
    def changePassword(self, newPassword):
        logger.debug("CurrentPassword: {}, NewPassword: {}".format(
            self.getCurrentPassword(), newPassword))
        # check against cur pwd
        if newPassword == self.getCurrentPassword():
            logger.fatal("Passwords match, heading back.")
            raise PasswordMatchException("Matches Current Go Home")
        # check against historical pwds
        historicalPasswords = self.getPasswordHistory()
        if newPassword in historicalPasswords:
            logger.fatal(
                "Password is in historical list, try again. {}".format(
                    historicalPasswords))
            raise HistoricalPasswordMatchException("Found in historical list")
        logger.debug("Current Historical List: {}".format(historicalPasswords))

        # got here, so lets update the pwd and add it to the history
        self.curPwd = newPassword
        historicalPasswords.append(newPassword)
        if len(historicalPasswords) >= MAX_HISTORICAL_PWDS:
            historicalPasswords.remove(historicalPasswords[0])

        logger.debug("Password changed to {} ".format(newPassword))
Пример #18
0
def run_experiment(experiment: Experiment,
                   settings: Settings = None) -> Journal:
    """
    Run the given `experiment` method step by step, in the following sequence:
    steady probe, action, close probe.

    Activities can be executed in background when they have the
    `"background"` property set to `true`. In that case, the activity is run in
    a thread. By the end of runs, those threads block until they are all
    complete.

    If the experiment has the `"dry"` property set to `False`, the experiment
    runs without actually executing the activities.

    NOTE: Tricky to make a decision whether we should rollback when exiting
    abnormally (Ctrl-C, SIGTERM...). Afterall, there is a chance we actually
    cannot afford to rollback properly. Better bailing to a conservative
    approach. This means we swallow :exc:`KeyboardInterrupt` and
    :exc:`SystemExit` and do not bubble it back up to the caller. We when were
    interrupted, we set the `interrupted` flag of the result accordingly to
    notify the caller this was indeed not terminated properly.
    """
    logger.info("Running experiment: {t}".format(t=experiment["title"]))

    dry = experiment.get("dry", False)
    if dry:
        logger.warning("Dry mode enabled")

    started_at = time.time()
    settings = settings if settings is not None else get_loaded_settings()
    config = load_configuration(experiment.get("configuration", {}))
    secrets = load_secrets(experiment.get("secrets", {}), config)
    initialize_global_controls(experiment, config, secrets, settings)
    initialize_controls(experiment, config, secrets)
    activity_pool, rollback_pool = get_background_pools(experiment)

    control = Control()
    journal = initialize_run_journal(experiment)

    try:
        try:
            control.begin("experiment", experiment, experiment, config,
                          secrets)
            # this may fail the entire experiment right there if any of the
            # probes fail or fall out of their tolerance zone
            try:
                state = run_steady_state_hypothesis(experiment,
                                                    config,
                                                    secrets,
                                                    dry=dry)
                journal["steady_states"]["before"] = state
                if state is not None and not state["steady_state_met"]:
                    p = state["probes"][-1]
                    raise ActivityFailed(
                        "Steady state probe '{p}' is not in the given "
                        "tolerance so failing this experiment".format(
                            p=p["activity"]["name"]))
            except ActivityFailed as a:
                journal["steady_states"]["before"] = state
                journal["status"] = "failed"
                logger.fatal(str(a))
            else:
                try:
                    journal["run"] = apply_activities(experiment, config,
                                                      secrets, activity_pool,
                                                      dry)
                except Exception:
                    journal["status"] = "aborted"
                    logger.fatal(
                        "Experiment ran into an un expected fatal error, "
                        "aborting now.",
                        exc_info=True)
                else:
                    try:
                        state = run_steady_state_hypothesis(experiment,
                                                            config,
                                                            secrets,
                                                            dry=dry)
                        journal["steady_states"]["after"] = state
                        if state is not None and not state["steady_state_met"]:
                            journal["deviated"] = True
                            p = state["probes"][-1]
                            raise ActivityFailed(
                                "Steady state probe '{p}' is not in the given "
                                "tolerance so failing this experiment".format(
                                    p=p["activity"]["name"]))
                    except ActivityFailed as a:
                        journal["status"] = "failed"
                        logger.fatal(str(a))
        except InterruptExecution as i:
            journal["status"] = "interrupted"
            logger.fatal(str(i))
        except (KeyboardInterrupt, SystemExit):
            journal["status"] = "interrupted"
            logger.warn("Received an exit signal, "
                        "leaving without applying rollbacks.")
        else:
            journal["status"] = journal["status"] or "completed"
            journal["rollbacks"] = apply_rollbacks(experiment, config, secrets,
                                                   rollback_pool, dry)

        journal["end"] = datetime.utcnow().isoformat()
        journal["duration"] = time.time() - started_at

        has_deviated = journal["deviated"]
        status = "deviated" if has_deviated else journal["status"]

        logger.info("Experiment ended with status: {s}".format(s=status))

        if has_deviated:
            logger.info(
                "The steady-state has deviated, a weakness may have been "
                "discovered")

        control.with_state(journal)

        try:
            control.end("experiment", experiment, experiment, config, secrets)
        except ChaosException:
            logger.debug("Failed to close controls", exc_info=True)

    finally:
        cleanup_controls(experiment)
        cleanup_global_controls()

    return journal
Пример #19
0
def switch_team_during_verification_run(
        source: str,  # noqa: C901
        settings: Settings) -> bool:
    """
    Verification may be run in a different team than the active team the user
    selected. Rather than preventing the verification from running, try to
    switch to the appropriate team's context for the duration of this run.

    It's all in memory and not changed on disk.
    """
    if not has_chaosiq_extension_configured(settings):
        logger.fatal(
            "Please signin to ChaosIQ services first with `$ chaos signin`")
        return False

    base_url = get_endpoint_url(settings)
    verify_tls = get_verify_tls(settings)
    default_org = get_default_org(settings)
    team = get_default_team(default_org)
    if not team:
        logger.fatal("Please select a default team with `$chaos team`")
        return False
    team_id = team["id"]

    token = get_auth_token(settings, base_url)
    if not token:
        logger.fatal(
            "Please signin to ChaosIQ services first with `$ chaos signin`")

    p = urlparse(source)
    if p.scheme.lower() in ["http", "https"]:
        r = requests.get(source,
                         headers={"Authorization": "Bearer {}".format(token)},
                         verify=verify_tls)
        if r.status_code != 200:
            logger.fatal("Failed to retrieve verification at '{}': {}".format(
                source, r.text))
            return False

        experiment = r.json()
        experiment_team_id = get_team_id(experiment)
        if experiment_team_id:
            team_id = experiment_team_id
    else:
        if not os.path.exists(p.path):
            raise InvalidSource('Path "{}" does not exist.'.format(source))
        experiment = parse_experiment_from_file(source)
        experiment_team_id = get_team_id(experiment)
        if experiment_team_id:
            team_id = experiment_team_id

    if not team_id:
        logger.fatal(
            "Failed to lookup the team identifier from the verification. "
            "Are you trying to run a verification using an experiment you "
            "created manually? This is not possible right now unfortunately.")
        return False

    if team["id"] != team_id:
        team_url = urls.team(urls.org(urls.base(base_url),
                                      organization_id=default_org["id"]),
                             team_id=team_id)

        r = request_team(team_url, token, verify_tls)
        if r.status_code != 200:
            logger.fatal("You cannot access the team owning this verification."
                         "Please request them to join the team.")
            return False

        team = r.json()
        if default_org["id"] != team["org_id"]:
            logger.fatal(
                "You must be signed in to the appropriate organization to run "
                "this verification. Please run `$ chaos signin`.")
            return False

        logger.debug("Running a verification in a team different from the "
                     "active one. Activating '{}' for this run.".format(
                         team["name"]))

        set_default_team(default_org, {
            "id": team_id,
            "default": True,
            "name": team["name"]
        })

    return True
Пример #20
0
    def _run(
            self,
            strategy: Strategy,
            schedule: Schedule,  # noqa: C901
            experiment: Experiment,
            journal: Journal,
            configuration: Configuration,
            secrets: Secrets,
            settings: Settings,
            event_registry: EventHandlerRegistry) -> None:
        experiment["title"] = substitute(experiment["title"], configuration,
                                         secrets)
        logger.info("Running experiment: {t}".format(t=experiment["title"]))

        started_at = time.time()
        journal = journal or initialize_run_journal(experiment)
        event_registry.started(experiment, journal)

        control = Control()
        activity_pool, rollback_pool = get_background_pools(experiment)
        hypo_pool = get_hypothesis_pool()
        continous_hypo_event = threading.Event()

        dry = experiment.get("dry", False)
        if dry:
            logger.warning("Dry mode enabled")

        initialize_global_controls(experiment, configuration, secrets,
                                   settings)
        initialize_controls(experiment, configuration, secrets)

        logger.info("Steady-state strategy: {}".format(strategy.value))
        rollback_strategy = settings.get("runtime",
                                         {}).get("rollbacks", {}).get(
                                             "strategy", "default")
        logger.info("Rollbacks strategy: {}".format(rollback_strategy))

        exit_gracefully_with_rollbacks = True
        with_ssh = has_steady_state_hypothesis_with_probes(experiment)
        if not with_ssh:
            logger.info("No steady state hypothesis defined. That's ok, just "
                        "exploring.")

        try:
            try:
                control.begin("experiment", experiment, experiment,
                              configuration, secrets)

                state = object()
                if with_ssh and should_run_before_method(strategy):
                    state = run_gate_hypothesis(experiment, journal,
                                                configuration, secrets,
                                                event_registry, dry)

                if state is not None:
                    if with_ssh and should_run_during_method(strategy):
                        run_hypothesis_during_method(hypo_pool,
                                                     continous_hypo_event,
                                                     strategy, schedule,
                                                     experiment, journal,
                                                     configuration, secrets,
                                                     event_registry, dry)

                    state = run_method(strategy, activity_pool, experiment,
                                       journal, configuration, secrets,
                                       event_registry, dry)

                    continous_hypo_event.set()
                    if journal["status"] not in ["interrupted", "aborted"]:
                        if with_ssh and (state is not None) and \
                                should_run_after_method(strategy):
                            run_deviation_validation_hypothesis(
                                experiment, journal, configuration, secrets,
                                event_registry, dry)
            except InterruptExecution as i:
                journal["status"] = "interrupted"
                logger.fatal(str(i))
                event_registry.interrupted(experiment, journal)
            except KeyboardInterrupt:
                journal["status"] = "interrupted"
                logger.warning("Received a termination signal (Ctrl-C)...")
                event_registry.signal_exit()
            except SystemExit as x:
                journal["status"] = "interrupted"
                logger.warning("Received the exit signal: {}".format(x.code))

                exit_gracefully_with_rollbacks = x.code != 30
                if not exit_gracefully_with_rollbacks:
                    logger.warning("Ignoring rollbacks as per signal")
                event_registry.signal_exit()
            finally:
                hypo_pool.shutdown(wait=True)

            # just in case a signal overrode everything else to tell us not to
            # play them anyway (see the exit.py module)
            if exit_gracefully_with_rollbacks:
                run_rollback(rollback_strategy, rollback_pool, experiment,
                             journal, configuration, secrets, event_registry,
                             dry)

            journal["end"] = datetime.utcnow().isoformat()
            journal["duration"] = time.time() - started_at

            # the spec only allows these statuses, so if it's anything else
            # we override to "completed"
            if journal["status"] not in ("completed", "failed", "aborted",
                                         "interrupted"):
                journal["status"] = "completed"

            has_deviated = journal["deviated"]
            status = "deviated" if has_deviated else journal["status"]
            logger.info("Experiment ended with status: {s}".format(s=status))
            if has_deviated:
                logger.info(
                    "The steady-state has deviated, a weakness may have been "
                    "discovered")

            control.with_state(journal)
            try:
                control.end("experiment", experiment, experiment,
                            configuration, secrets)
            except ChaosException:
                logger.debug("Failed to close controls", exc_info=True)
        finally:
            try:
                cleanup_controls(experiment)
                cleanup_global_controls()
            finally:
                event_registry.finish(journal)

        return journal