def sm_map(func: Callable[[T1], T2],
           iter_: Iterable[T1],
           hostname: str = None) -> List[T2]:
    """Implements a map-like interface using sandman. Should be obtained using get_sm_map to set hostname"""
    op = sm.operation(func, include_modules=True)
    outputs = [op(arg) for arg in iter_]
    with sm.Session(host=hostname) as sess:
        result = sess.submit(outputs)
    return result
def rake(hostname=None, replications=4, summary: callable = None):
    Uses the sandman2 api to run multiple replications of multiple configurations of the simulation.
    If hostname=None, runs locally. Otherwise, make sure environment variable SANDMAN_KEY_ID and SANDMAN_KEY_SECRET
    are set.
        hostname: The remote server to run the job on
        replications: The number of replications of each parameter set to run
        summary: The summary statistic (function) to apply to the results
    if importlib.util.find_spec("hickle") is None:
        raise ModuleNotFoundError(
            "hickle not found but required for saving logs")

    if hostname is None:
        print("Running ensemble locally")
        print(f"Running ensemble on {hostname}")
    """Configure the parameter sets to run"""
    default_parameters: Dict = isleconfig.simulation_parameters
    parameter_sets: Dict[str:Dict] = {}

    # This section should be freely modified to determine the experiment
    # The keys of parameter_sets are the prefixes to save logs under, the values are the parameters to run
    # The keys should be strings

    for number_riskmodels in [1, 3]:
        # default_parameters is mutable, so should be copied
        new_parameters = default_parameters.copy()
        new_parameters["no_riskmodels"] = number_riskmodels
        parameter_sets["ensemble" + str(number_riskmodels)] = new_parameters


    print(f"Running {len(parameter_sets)} simulations of {replications} "
          f"replications of {default_parameters['max_time']} timesteps")
    for name in parameter_sets:
        if not isinstance(name, str):
            raise ValueError("Prefixes must be strings")
    """Sanity checks"""

    # Check that the necessary env variables are set
    if hostname is not None:
        if not ("SANDMAN_KEY_ID" in os.environ
                and "SANDMAN_KEY_SECRET" in os.environ):
                "Warning: Sandman authentication not found in environment variables."

    max_time = isleconfig.simulation_parameters["max_time"]

    if not isleconfig.slim_log:
        # We can estimate the log size per experiment in GB (max_time is squared as number of insurance firms also
        # increases with time and per-firm logs are dominating in the limit). The 6 is empirical
        # TODO: Is this even vaguely correct? Who knows!
        estimated_log_size = max_time**2 * replications * 6 / (1000**3)
        if estimated_log_size > 1:
                "Uncompressed log size estimated to be above 1GB - consider using slim logs"

    if hostname is not None and isleconfig.show_network:
        print("Warning: can't show network on remote server")
        isleconfig.show_network = False
    """Configuration of the ensemble"""
    """Configure the return values and corresponding file suffixes where they should be saved"""
    requested_logs = {
        "total_cash": "_cash.dat",
        "total_excess_capital": "_excess_capital.dat",
        "total_profitslosses": "_profitslosses.dat",
        "total_contracts": "_contracts.dat",
        "total_operational": "_operational.dat",
        "total_reincash": "_reincash.dat",
        "total_reinexcess_capital": "_reinexcess_capital.dat",
        "total_reinprofitslosses": "_reinprofitslosses.dat",
        "total_reincontracts": "_reincontracts.dat",
        "total_reinoperational": "_reinoperational.dat",
        "total_catbondsoperational": "_total_catbondsoperational.dat",
        "market_premium": "_premium.dat",
        "market_reinpremium": "_reinpremium.dat",
        "cumulative_bankruptcies": "_cumulative_bankruptcies.dat",
        "cumulative_market_exits": "_cumulative_market_exits.dat",
        "cumulative_unrecovered_claims": "_cumulative_unrecovered_claims.dat",
        "cumulative_claims": "_cumulative_claims.dat",
        "cumulative_bought_firms": "_cumulative_bought_firms.dat",
        "insurance_firms_cash": "_insurance_firms_cash.dat",
        "reinsurance_firms_cash": "_reinsurance_firms_cash.dat",
        "market_diffvar": "_market_diffvar.dat",
        "rc_event_schedule_initial": "_rc_event_schedule.dat",
        "rc_event_damage_initial": "_rc_event_damage.dat",
        "number_riskmodels": "_number_riskmodels.dat",
        "insurance_contracts": "_insurance_contracts.dat",
        "reinsurance_contracts": "_reinsurance_contracts.dat",
        "unweighted_network_data": "_unweighted_network_data.dat",
        "network_node_labels": "_network_node_labels.dat",
        "network_edge_labels": "_network_edge_labels.dat",
        "number_of_agents": "_number_of_agents",
    """Define the numpy types of the underlying data in each requested log"""
    types = {
        "total_cash": np.float_,
        "total_excess_capital": np.float_,
        "total_profitslosses": np.float_,
        "total_contracts": np.int_,
        "total_operational": np.int_,
        "total_reincash": np.float_,
        "total_reinexcess_capital": np.float_,
        "total_reinprofitslosses": np.float_,
        "total_reincontracts": np.int_,
        "total_reinoperational": np.int_,
        "total_catbondsoperational": np.int_,
        "market_premium": np.float_,
        "market_reinpremium": np.float_,
        "cumulative_bankruptcies": np.int_,
        "cumulative_market_exits": np.int_,
        "cumulative_unrecovered_claims": np.float_,
        "cumulative_claims": np.float_,
        "cumulative_bought_firms": np.int_,
        "cumulative_nonregulation_firms": np.int_,
        "insurance_firms_cash": np.float_,
        "reinsurance_firms_cash": np.float_,
        "market_diffvar": np.float_,
        "rc_event_schedule_initial": np.int_,
        "rc_event_damage_initial": np.float_,
        "number_riskmodels": np.int_,
        "insurance_contracts": np.int_,
        "reinsurance_contracts": np.int_,
        "unweighted_network_data": np.float_,
        "network_node_labels": np.float_,
        "network_edge_labels": np.float_,
        "number_of_agents": np.int_,

    if isleconfig.slim_log:
        for name in [
            del requested_logs[name]

    elif not isleconfig.save_network:
        for name in [
            del requested_logs[name]
    """Configure log directory and ensure that the directory exists"""
    dir_prefix = "/data/"
    directory = os.getcwd() + dir_prefix
    if not os.path.isdir(directory):
        if os.path.exists(directory.rstrip("/")):
            raise Exception(
                "./data exists as regular file. "
                "This filename is required for the logging and event schedule directory"
    """Clear old dict saving files (*_history_logs.dat)"""
    for prefix in parameter_sets.keys():
        filename = os.getcwd(
        ) + dir_prefix + "full_" + prefix + "_history_logs.dat"
        if os.path.exists(filename):
    """Setup of the simulations"""
    # Here the setup for the simulation is done.
    # Since this script is used to carry out simulations in the cloud will usually have more than 1 replication.
    # We don't set filepath=, so the full set of events and seeds will be stored in data/risk_event_schedules.islestore
    # If we wished we could replicate by setting isleconfig.replicating = True.
    setup = setup_simulation.SetupSim()
    ] = setup.obtain_ensemble(replications)

    # never save simulation state in ensemble runs (resuming is impossible anyway)
    save_iter = 0

    m = sm.operation(start.main, include_modules=True)

    jobs = {}
    position_maps = {}
    for prefix in parameter_sets:
        # In this loop the parameters, schedules and random seeds for every run are prepared. Different risk models will
        # be run with the same schedule, damage size and random seed for a fair comparison.

        simulation_parameters = parameter_sets[prefix]

        # Here is assembled each job with the corresponding: simulation parameters, time events, damage events, seeds,
        # simulation state save interval (never), and list of requested logs.
        job = [
            ) for x in range(replications)
        jobs[prefix] = job
        position_maps[prefix] = {o.id: p for p, o in enumerate(job)}
    """Here the jobs are submitted"""
    print("Jobs constructed, submitting")
    with sm.Session(host=hostname, default_cb_to_stdout=True) as sess:
        # TODO: Allow for resuming a detatched run with task = sess.get(job_id)
        tasks = {}
        for prefix, job in jobs.items():
            # If there are 4 parameter sets jobs will be a dict with 4 elements.
            """Run simulation and obtain result"""
            task = sess.submit_async(job)
            print(f"Started job, prefix {prefix}, given ID {task.id}")
            tasks[prefix] = task

        print("Now waiting for jobs to complete\033[5m...\033[0m")
        wait_for_tasks(tasks, replications, position_maps, summary)

    print("Recieved all results and written all files, all finished.")
def rake(hostname=None, summary: callable = None, use_sandman: bool = False):
    Uses the sandman2 api to run multiple replications of multiple configurations of the simulation.
    If hostname=None, runs locally. Otherwise, make sure environment variable SANDMAN_KEY_ID and SANDMAN_KEY_SECRET
    are set.
        hostname: The remote server to run the job on
        summary: The summary statistic (function) to apply to the results
        use_sandman: if True, uses sandman, otherwise uses multiprocessing (faster if running very many simulations

    # TODO: RM

    if importlib.util.find_spec("hickle") is None:
        raise ModuleNotFoundError(
            "hickle not found but required for saving logs")

    if hostname is None:
        print("Running ensemble locally")
        if use_sandman:
            print(f"Running ensemble on {hostname}")
            raise ValueError("use_sandman is False, but hostname is given")
    """Configure the parameter sets to run"""
    default_parameters: Dict = isleconfig.simulation_parameters
    parameter_list = None

    # This section should be freely modified to determine the experiment
    # parameters should be a list of (hashable) lables for the settings, which parameter_list should be a list of.

    import SALib.util
    import SALib.sample.morris

    problem = SALib.util.read_param_file("isle_all_parameters.txt")
    param_values = SALib.sample.morris.sample(problem,
                                              N=problem["num_vars"] * 3)
    parameters = [tuple(row) for row in param_values]
    parameter_list = [{
        "max_time": 2000,
        **{problem["names"][i]: row[i]
           for i in range(len(row))},
    } for row in param_values]
    if parameter_list[1] == parameter_list[0]:
        raise RuntimeError("Parameter list appears to be homogenous!")


    max_time = parameter_list[0]["max_time"]

    print(f"Running {len(parameter_list)} simulations of {max_time} timesteps")
    """Sanity checks"""

    # Check that the necessary env variables are set
    if hostname is not None:
        if not ("SANDMAN_KEY_ID" in os.environ
                and "SANDMAN_KEY_SECRET" in os.environ):
                "Warning: Sandman authentication not found in environment variables."

    if hostname is not None and isleconfig.show_network:
        print("Warning: can't show network on remote server")
        isleconfig.show_network = False
    """Configuration of the ensemble"""
    """Configure the return values and corresponding file suffixes where they should be saved"""
    requested_logs = {
        "total_cash": "_cash.dat",
        "total_excess_capital": "_excess_capital.dat",
        "total_profitslosses": "_profitslosses.dat",
        "total_contracts": "_contracts.dat",
        "total_operational": "_operational.dat",
        "total_reincash": "_reincash.dat",
        "total_reinexcess_capital": "_reinexcess_capital.dat",
        "total_reinprofitslosses": "_reinprofitslosses.dat",
        "total_reincontracts": "_reincontracts.dat",
        "total_reinoperational": "_reinoperational.dat",
        "total_catbondsoperational": "_total_catbondsoperational.dat",
        "market_premium": "_premium.dat",
        "market_reinpremium": "_reinpremium.dat",
        "cumulative_bankruptcies": "_cumulative_bankruptcies.dat",
        "cumulative_market_exits": "_cumulative_market_exits.dat",
        "cumulative_unrecovered_claims": "_cumulative_unrecovered_claims.dat",
        "cumulative_claims": "_cumulative_claims.dat",
        "cumulative_bought_firms": "_cumulative_bought_firms.dat",
        "insurance_firms_cash": "_insurance_firms_cash.dat",
        "reinsurance_firms_cash": "_reinsurance_firms_cash.dat",
        "market_diffvar": "_market_diffvar.dat",
        "rc_event_schedule_initial": "_rc_event_schedule.dat",
        "rc_event_damage_initial": "_rc_event_damage.dat",
        "number_riskmodels": "_number_riskmodels.dat",
        "insurance_contracts": "_insurance_contracts.dat",
        "reinsurance_contracts": "_reinsurance_contracts.dat",
        "unweighted_network_data": "_unweighted_network_data.dat",
        "network_node_labels": "_network_node_labels.dat",
        "network_edge_labels": "_network_edge_labels.dat",
        "number_of_agents": "_number_of_agents",
    """Configure log directory and ensure that the directory exists"""
    dir_prefix = "/data/"
    directory = os.getcwd() + dir_prefix
    if not os.path.isdir(directory):
        if os.path.exists(directory.rstrip("/")):
            raise Exception(
                "./data exists as regular file. "
                "This filename is required for the logging and event schedule directory"
    """Setup of the simulations"""
    # Here the setup for the simulation is done.
    # Since this script is used to carry out simulations in the cloud will usually have more than 1 replication.
    # We don't set filepath=, so the full set of events and seeds will be stored in data/risk_event_schedules.islestore
    # If we wished we could replicate by setting isleconfig.replicating = True.
    setup = setup_simulation.SetupSim()
    print("Setting up simulation")
    ] = setup.obtain_ensemble(len(parameter_list))

    n = len(parameter_list)
    m_params = list(
            [0] * n,
            [0] * n,
            [None] * n,
            [False] * n,
            [summary] * n,

    if use_sandman:
        import sandman2.api as sm

        print("Constructing sandman operation")
        m = sm.operation(start.main, include_modules=True)
        print("Assembling jobs")

        # Here is assembled each job with the corresponding: simulation parameters, time events, damage events, seeds,
        # simulation state save interval (never), and list of requested logs.

        # This is actually quite slow for large sets of jobs. Can't use mp.Pool due to unpickleability
        # Could use pathos or similar if we actually end up caring
        job = list(map(m, m_params))
        # # Split up into chunks so sandman server doesn't blow up
        # max_size = 71
        # job_lists = []
        # while len(job) > 0:
        #     job_lists.append(job[: min(max_size, len(job))])
        #     job = job[min(max_size, len(job)) :]
        """Here the jobs are submitted"""
        print("Jobs created, submitting")
        with sm.Session(host=hostname, default_cb_to_stdout=True) as sess:
            print("Starting job")
            # result = []
            # for job in job_lists:
            #     result += sess.submit(job)

            # Submit async so we can reattach with sess.get if something goes wrong locally
            task = sess.submit_async(job)

            result = task.results
        # result = []
        # m_params.reverse()
        # for i, param_set in enumerate(m_params):
        #     result.append(start.main(param_set))
        import multiprocessing as mp

        print("Running multiprocessing pool")
        # set maxtasksperchild, otherwise it seems that garbage collection(?) misbehaves and we get huge memory usage
        with mp.Pool(maxtasksperchild=1) as pool:
            # Since the jobs are so big, chunksize=1 is best
            result = pool.map(start.main, m_params, chunksize=1)

    print("Job done, saving")
    result_dict = {t: r for t, r in zip(parameters, result)}
