Exemplo n.º 1
0
def gather_timing_data(
    times_per_step: List[Dict[str, float]],
    results: Dict[str, Any],
    comm: MPI.Comm,
    root: int = 0,
) -> Dict[str, Any]:
    """returns an updated version of  the results dictionary owned
    by the root node to hold data on the substeps as well as the main loop timers"""
    is_root = comm.Get_rank() == root
    keys = collect_keys_from_data(times_per_step)
    data: List[float] = []
    for timer_name in keys:
        data.clear()
        for data_point in times_per_step:
            if timer_name in data_point:
                data.append(data_point[timer_name])

        sendbuf = np.array(data)
        recvbuf = None
        if is_root:
            recvbuf = np.array([data] * comm.Get_size())
        comm.Gather(sendbuf, recvbuf, root=0)
        if is_root:
            results["times"][timer_name]["times"] = copy.deepcopy(
                recvbuf.tolist())
    return results
Exemplo n.º 2
0
 def __init__(self, comm: MPI.Comm = MPI.COMM_WORLD):
     self.comm_ = comm.Dup()
     self.num_procs_ = comm.Get_size()
     self.rank_ = comm.Get_rank()
     self.targcov_ = 1.0
     self.learnlev_ = 1
     self.nchain_ = 1
     self.step_ = 1
     self.modelid_ = 0
     self.maxcount_ = 100
Exemplo n.º 3
0
    def create_shared(global_comm: MPI.Comm,
                      size: int,
                      n_params: int,
                      reporter: Optional[Reporter] = None,
                      seed=None) -> NoiseTable:
        """Shares a noise table across multiple nodes. Assumes that each node has at least 2 MPI processes"""
        local_comm: MPI.Comm = global_comm.Split_type(MPI.COMM_TYPE_SHARED)
        assert local_comm.size > 1

        n_nodes = global_comm.allreduce(1 if local_comm.rank == 0 else 0,
                                        MPI.SUM)

        shared_arr = create_shared_arr(local_comm, size)
        nt = NoiseTable(n_params, shared_arr)

        if global_comm.rank == 0:  # create and distribute seed
            seed = seed if seed is not None else np.random.randint(
                0, 1000000)  # create seed if one is not provided
            if reporter is not None: reporter.print(f'nt seed:{seed}')
            for i in range(n_nodes):
                global_rank_to_send = global_comm.recv(
                    source=MPI.ANY_SOURCE
                )  # recv global rank from each nodes proc 1
                global_comm.send(seed,
                                 global_rank_to_send)  # send seed to that rank

        if local_comm.rank == 1:  # send rank, receive seed and populated shared mem with noise
            global_comm.send(global_comm.rank, 0)  # send local rank
            seed = global_comm.recv(source=0)  # receive noise seed
            shared_arr[:size] = NoiseTable.make_noise(
                size, seed)  # create arr values

        global_comm.Barrier()  # wait until all nodes have set the array values
        return nt
def rna_dist_from_samples(comm: mpi.Comm, poisson_samples: np.ndarray,
                          nmax: int) -> np.ndarray:

    nsamples_local = len(poisson_samples)
    nsamples = comm.allreduce(nsamples_local, mpi.SUM)
    nmax = comm.allreduce(nmax, mpi.MAX)

    p_local = np.zeros((nmax + 1, ), dtype=float)
    x_eval = np.arange(0, nmax + 1, dtype=int)
    for j in range(0, len(poisson_samples)):
        p_local += poisson.pmf(x_eval, mu=poisson_samples[j])
    p_local = comm.allreduce(p_local, mpi.SUM)
    p_local = (1.0 / nsamples) * p_local
    return p_local
Exemplo n.º 5
0
def test_params(comm: MPI.Comm, n: int, policy: Policy, nt: NoiseTable, gen_obstat: ObStat,
                fit_fn: Callable[[Module], TrainingResult], rs: RandomState) \
        -> Tuple[np.ndarray, np.ndarray, np.ndarray, int]:
    """
    Tests `n` different perturbations of `policy`'s params and returns the positive and negative results
    (from all processes).

    Where positive_result[i] is the fitness when the noise at nt[noise_inds[i]] is added to policy.flat_params
    and negative_result[i] is when the same noise is subtracted

    :returns: tuple(positive results, negative results, noise inds, total steps)
    """
    results_pos, results_neg, inds = [], [], []
    for _ in range(n):
        idx, noise = nt.sample(rs)
        inds.append(idx)
        # for each noise ind sampled, both add and subtract the noise
        results_pos.append(fit_fn(policy.pheno(noise)))
        results_neg.append(fit_fn(policy.pheno(-noise)))
        gen_obstat.inc(*results_pos[-1].ob_sum_sq_cnt)
        gen_obstat.inc(*results_neg[-1].ob_sum_sq_cnt)

    n_objectives = len(results_pos[0].result)
    results = _share_results(comm, [tr.result for tr in results_pos],
                             [tr.result for tr in results_neg], inds)
    gen_obstat.mpi_inc(comm)
    steps = comm.allreduce(sum([tr.steps for tr in results_pos + results_neg]),
                           op=MPI.SUM)

    return results[:,
                   0:n_objectives], results[:, n_objectives:2 *
                                            n_objectives], results[:,
                                                                   -1], steps
Exemplo n.º 6
0
def comm_from_mpi4py(comm: __mpi4py_MPI.Comm) -> MPI_Communicator:
    """Converts a ``mpi4py`` communicator to a :py:class:`mpi4torch.MPI_Communicator`.
    """

    fortran_handle = comm.py2f()
    return MPI_Communicator(
        torch.ops.mpi4torch.comm_from_fortran(fortran_handle))
Exemplo n.º 7
0
def set_device(comm: MPI.Comm):
    """Set the GPU device ID for this rank when using CuPy backend.

    We try our best to make neighber ranks use the same GPU.

    Arguments
    ---------
    comm : mpi4py.MPI.Comm
        The communicator.
    """

    # get number of GPUs on this particular compute node
    n_gpus = _nplike.cuda.runtime.getDeviceCount()

    # get the info of the processes on this compute node
    local_name = MPI.Get_processor_name()
    local_comm = comm.Split_type(MPI.COMM_TYPE_SHARED)
    local_size = local_comm.Get_size()
    local_rank = local_comm.Get_rank()

    # set the corresponding gpu id for this rank
    group_size = local_size // n_gpus
    remains = local_size % n_gpus

    if local_rank < (
            group_size +
            1) * remains:  # groups having 1 more rank 'cause of the remainder
        my_gpu = local_rank // (group_size + 1)
    else:
        my_gpu = (local_rank - remains) // group_size

    _nplike.cuda.runtime.setDevice(my_gpu)

    _logger.debug("node name: %s; local size:%d; local rank: %d; gpu id: %d",
                  local_name, local_size, local_rank, my_gpu)
Exemplo n.º 8
0
def setupSave(constants,foldername: str = None,
                comm: MPI.Comm = MPI.COMM_WORLD, root: int = 0):
    if (comm.Get_rank()==root):
        if (foldername==None):
            i=0
            foldername="simulation_{0}".format(i)
            while(os.path.isdir(foldername)):
                i+=1
                foldername="simulation_{0}".format(i)
            os.mkdir(foldername)
            foldername=comm.bcast(foldername,root=root)
        else:
            if (not os.path.isdir(foldername)):
                os.mkdir(foldername)
        filename = '{0}/initParams.json'.format(foldername)
        print(constants,file=open(filename, "w"))
    else:
        if (foldername==None):
            foldername=comm.bcast("",root=root)
    return foldername
Exemplo n.º 9
0
def split_communicator(comm: MPI.Comm, splitting: int) -> Tuple[MPI.Comm, MPI.Comm]:
    """
    Creates new communicators for space & time parallelism by
    "splitting" the input communicator into two sub-communicators.

    :param comm: Communicator to be used as the basis for new communicators
    :param splitting: Splitting factor (number of processes for spatial parallelism)
    :return: Space and time communicator
    """

    # Determine color based on splitting factor
    # All processes with the same color will be assigned to the same communicator.
    rank = comm.Get_rank()
    x_color = rank // splitting
    t_color = rank % splitting

    # Split the communicator based on the color and key
    comm_x = comm.Split(color=x_color, key=rank)
    comm_t = comm.Split(color=t_color, key=rank)

    return comm_x, comm_t
Exemplo n.º 10
0
def wait_and_notify(comm: MPI.Comm, service_name: str, port: Port,
                    message: str, message_per_rank: Optional[str] = None) \
        -> None:
    """Wait until a barrier is reached and then notify the user.

    """
    comm.barrier()

    rank = comm.Get_rank()
    size = comm.Get_size()

    data = service_name, port.port
    data = comm.gather(data, root=0)

    if rank == 0:
        for i in range(size):
            if message_per_rank:
                info = {'service_name': data[i][0],
                        'port': data[i][1]}
                print(message_per_rank.format(**info))
        print(message, flush=True)
Exemplo n.º 11
0
def joint_dist_from_poisson_parameters(comm: mpi.Comm,
                                       poisson_parameters: np.ndarray,
                                       nmax: int):
    nsamples_local = poisson_parameters.shape[0]
    nsamples = comm.allreduce(nsamples_local, mpi.SUM)
    nmax = comm.allreduce(nmax, mpi.MAX)

    p_local = np.zeros((nmax + 1, nmax + 1), dtype=float)

    x_eval = np.arange(0, nmax + 1, dtype=int)
    p_marginals = np.zeros((2, nmax + 1), dtype=float)

    for j in range(0, nsamples_local):
        for ispecies in range(0, 2):
            p_marginals[ispecies, :] = poisson.pmf(
                x_eval, mu=poisson_parameters[j, ispecies])
        p_local += np.kron(p_marginals[0, :], p_marginals[1, :]).reshape(
            (nmax + 1, nmax + 1))
    p_global = comm.allreduce(p_local, mpi.SUM)
    p_global = (1.0 / nsamples) * p_global
    return p_global
Exemplo n.º 12
0
def get_block(comm: _MPI.Comm, gnx: int, gny: int, ngh: int):
    """Get an instance of BloclMPI for the current MPI process.

    Arguments
    ---------
    comm : MPI.Comm
        The communicator.
    gnx, gny : int
        The global numbers of cells.
    ngh : int
        The number of ghost cells outside boundary. The "boundary" also includes the internal
        boundaries between two blocks. Required when exchanging data between blocks.

    Returns
    -------
    An instance of Block.
    """
    # pylint: disable=invalid-name

    data = {"ngh": ngh, "gnx": gnx, "gny": gny, "comm": comm}

    pnx, pny = cal_num_procs(comm.Get_size(), gnx, gny)

    pi, pj = cal_proc_loc_from_rank(pnx, comm.Get_rank())

    data["west"], data["east"], data["south"], data["north"] = \
        cal_neighbors(pnx, pny, pi, pj, comm.Get_rank())

    data["ibg"], data["ied"], data["jbg"], data["jed"] = \
        cal_local_cell_range(pnx, pny, pi, pj, gnx, gny)

    data["nx"], data[
        "ny"] = data["ied"] - data["ibg"], data["jed"] - data["jbg"]

    data["proc_shape"] = (pny, pnx)
    data["proc_loc"] = (pj, pi)

    return Block(**data)
Exemplo n.º 13
0
def _share_results(comm: MPI.Comm, fits_pos: List[List[float]],
                   fits_neg: List[List[float]], inds: List[int]) -> ndarray:
    """Share results and noise inds to all processes"""
    send_results = np.array(
        [fp + fn + [i]
         for fp, fn, i in zip(fits_pos, fits_neg, inds)] * comm.size,
        dtype=np.float)
    results = np.empty(send_results.shape)
    comm.Alltoall(send_results, results)

    objectives = len(fits_pos[0])

    return results.reshape(
        (-1, 1 + 2 * objectives))  # flattening the process dim
Exemplo n.º 14
0
 def __init__( self, eta_grid: list, bsplines: list, layouts: LayoutManager,
                 chosenLayout: str, comm : MPI.Comm = MPI.COMM_WORLD, **kwargs):
     dtype = kwargs.pop('dtype',float)
     self.hasSaveMemory = kwargs.pop('allocateSaveMemory',False)
     
     # get MPI values
     self.global_comm = comm
     self.rank = comm.Get_rank()
     self.mpi_size = comm.Get_size()
     
     # remember layout
     self._layout_manager=layouts
     self._current_layout_name=chosenLayout
     self._layout = layouts.getLayout(chosenLayout)
     if (self.hasSaveMemory):
         self._my_data = [np.empty(self._layout_manager.bufferSize,dtype=dtype),
                          np.empty(self._layout_manager.bufferSize,dtype=dtype),
                          np.empty(self._layout_manager.bufferSize,dtype=dtype)]
         self.notSaved = True
     else:
         self._my_data = [np.empty(self._layout_manager.bufferSize,dtype=dtype),
                          np.empty(self._layout_manager.bufferSize,dtype=dtype)]
     
     self._dataIdx = 0
     self._buffIdx = 1
     self._saveIdx = 2
     
     # Remember views on the data
     self._f = np.split(self._my_data[self._dataIdx],[self._layout.size])[0].reshape(self._layout.shape)
     
     # save coordinate information
     # saving in list allows simpler reordering of coordinates
     self._Vals = eta_grid
     self._splines = bsplines
     self._nDims = len(eta_grid)
     self._nGlobalCoords = [len(x) for x in eta_grid]
def worker(comm: MPI.Comm, cluster_rank: int) -> None:
    """
    The behavior of a Cluster's worker process.

    <comm> is the MPI Comm that the Cluster and its workers use to communicate,
    and <cluster_rank> is the rank of the Cluster's process.
    """
    sess = tf.compat.v1.Session()
    device, start_num, end_num, vary_opts = comm.recv(source=cluster_rank)
    with tf.compat.v1.device(device):
        graphs = OrderedDict()
        for num in range(start_num, end_num):
            graphs[num] = ConvNet(num, sess, vary_opts)
        while True:
            data = comm.recv(source=cluster_rank)
            instruction = data[0]
            if instruction == Instruction.EXIT:
                break
            elif instruction == Instruction.INIT:
                for graph in graphs.values():
                    graph.initialize_variables()
            else:
                if instruction == Instruction.COPY_TRAIN_GET:
                    new_values = data[3]
                    for num, new_value in new_values.items():
                        graphs[num].set_value(new_value)
                        graphs[num].explore()
                    until_step_num = data[4]
                    for graph in graphs.values():
                        if graph.step_num < until_step_num:
                            graph.train()
                nums = data[1]
                attributes = data[2]
                attribute_getters = [GETTERS[attribute] for attribute in attributes]
                comm.send({num: tuple(getter(graphs[num]) for getter in attribute_getters) for num in nums},
                          dest=cluster_rank)
Exemplo n.º 16
0
def seed(comm: MPI.Comm, seed: list, env: Optional[gym.Env] = None) -> Tuple[np.random.RandomState, int, int]:
    """Seeds torch, the env and returns the seed and a random state"""
    if seed is not None and hasattr(seed, '__len__') and len(seed) == comm.size:
        my_seed = seed[comm.rank]
        rs = np.random.RandomState(my_seed)
    else:
        rs, my_seed = gym.utils.seeding.np_random(None)

    global_seed = comm.scatter([my_seed] * comm.size)  # scatter root procs `my_seed` for seeding torch
    torch.random.manual_seed(global_seed)  # This seed must be the same on each proc for generating initial params
    if env is not None:
        env.seed(my_seed)
        env.action_space.seed(my_seed)
        env.observation_space.seed(my_seed)

    return rs, my_seed, global_seed
Exemplo n.º 17
0
def collect_data_and_write_to_file(args: Namespace, comm: MPI.Comm,
                                   hits_per_step, times_per_step,
                                   experiment_name) -> None:
    """
    collect the gathered data from all the ranks onto rank 0 and write the timing file
    """
    is_root = comm.Get_rank() == 0
    results = None
    if is_root:
        print("Gathering Times")
        results = set_experiment_info(experiment_name, args.time_step,
                                      args.backend, args.hash)
        results = gather_hit_counts(hits_per_step, results)

    results = gather_timing_data(times_per_step, results, comm)

    if is_root:
        write_global_timings(results)
    def __init__(self, pop_size: int, vary_opts: bool, comm: MPI.Comm, rank_devices: Dict[int, Device]) -> None:
        """
        Creates a new Cluster with <pop_size> ConvNets.

        If <vary_opts> is True, the TensorFlow Optimizers used by the ConvNets
        will be sampled at random and can be perturbed. Otherwise, they will
        always be AdamOptimizers.

        <comm> is the MPI Comm that this Cluster and its worker processes use
        to communicate. <rank_devices> is a dictionary in which each key is a
        worker's process rank and its corresponding value is the TensorFlow
        device on which that worker should create its assigned ConvNets.

        worker(<comm>, <rank>), where <rank> is the rank of this Cluster's
        process, must be called independently in each worker process.
        """
        print('Varying Optimizers:', vary_opts)
        self.sess = tf.compat.v1.Session()
        self.pop_size = pop_size
        self.vary_opts = vary_opts
        self.comm = comm
        self.rank_graphs = {rank: [] for rank in rank_devices.keys()}
        self.graph_ranks = []
        self.peak_metric = None
        self.peak_metric_value = None
        graphs_per_worker = pop_size / len(rank_devices)
        graph_num = 0
        graphs_to_make = 0
        reqs = []
        for rank, device in rank_devices.items():
            graphs_to_make += graphs_per_worker
            start_num = graph_num
            graph_num = min(graph_num + math.ceil(graphs_to_make), pop_size)
            self.rank_graphs[rank].extend(range(start_num, graph_num))
            self.graph_ranks.extend(rank for _ in range(start_num, graph_num))
            reqs.append(comm.isend((device, start_num, graph_num, vary_opts), dest=rank))
            graphs_to_make -= (graph_num - start_num)
        for req in reqs:
            req.wait()
Exemplo n.º 19
0
    def __init__(self, start_t: float, end_t: float, num_time_blocks: int,
                 comm: MPI.Comm):
        """
        This method sets up the coupling matrices and the structure for the kkt system

        Parameters
        ----------
        start_t: float
            The beginning of the time horizon
        end_t: float
            The end of the time horizon
        num_time_blocks: int
            The number of time blocks to split the time horizon into
        comm: MPI.Comm
            The MPI communicator
        """
        self._num_time_blocks: int = num_time_blocks
        self._num_states: Optional[int] = None
        self._nlps: Dict[int, InteriorPointInterface] = dict(
        )  # keys are the time block index (passed into the build_model_for_time_block method
        self._link_forward_matrices: Dict[int, coo_matrix] = dict(
        )  # these get multiplied by the primal vars of the corresponding time block
        self._link_backward_matrices: Dict[int, coo_matrix] = dict(
        )  # these get multiplied by the primal vars of the corresponding time block
        self._link_forward_coupling_matrices: Dict[int, coo_matrix] = dict(
        )  # these get multiplied by the coupling variables
        self._link_backward_coupling_matrices: Dict[int, coo_matrix] = dict(
        )  # these get multiplied by the coupling variables

        self._comm: MPI.Comm = comm
        self._rank: int = comm.Get_rank()
        self._size: int = comm.Get_size()

        if self._size > self._num_time_blocks:
            raise ValueError(
                'Cannot yet handle more processes than time blocks')

        self._local_block_indices: Sequence[int] = _distribute_blocks(
            num_time_blocks=num_time_blocks, rank=self._rank, size=self._size)
        self._ownership_map: Dict[int, int] = _get_ownership_map(
            num_time_blocks=num_time_blocks, size=self._size)

        self._primals_lb: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._primals_ub: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)

        self._ineq_lb: MPIBlockVector = self._build_mpi_block_vector()
        self._ineq_ub: MPIBlockVector = self._build_mpi_block_vector()

        self._init_primals: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._primals: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._delta_primals: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)

        self._init_slacks: MPIBlockVector = self._build_mpi_block_vector()
        self._slacks: MPIBlockVector = self._build_mpi_block_vector()
        self._delta_slacks: MPIBlockVector = self._build_mpi_block_vector()

        self._init_duals_eq: MPIBlockVector = self._build_mpi_block_vector()
        self._duals_eq: MPIBlockVector = self._build_mpi_block_vector()
        self._delta_duals_eq: MPIBlockVector = self._build_mpi_block_vector()

        self._init_duals_ineq: MPIBlockVector = self._build_mpi_block_vector()
        self._duals_ineq: MPIBlockVector = self._build_mpi_block_vector()
        self._delta_duals_ineq: MPIBlockVector = self._build_mpi_block_vector()

        self._init_duals_primals_lb: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._duals_primals_lb: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._delta_duals_primals_lb: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)

        self._init_duals_primals_ub: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._duals_primals_ub: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._delta_duals_primals_ub: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)

        self._init_duals_slacks_lb: MPIBlockVector = self._build_mpi_block_vector(
        )
        self._duals_slacks_lb: MPIBlockVector = self._build_mpi_block_vector()
        self._delta_duals_slacks_lb: MPIBlockVector = self._build_mpi_block_vector(
        )

        self._init_duals_slacks_ub: MPIBlockVector = self._build_mpi_block_vector(
        )
        self._duals_slacks_ub: MPIBlockVector = self._build_mpi_block_vector()
        self._delta_duals_slacks_ub: MPIBlockVector = self._build_mpi_block_vector(
        )

        self._eq_resid: MPIBlockVector = self._build_mpi_block_vector()
        self._ineq_resid: MPIBlockVector = self._build_mpi_block_vector()
        self._grad_objective: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._jac_eq: MPIBlockMatrix = self._build_mpi_block_matrix(
            extra_row=False, extra_col=True)
        self._jac_ineq: MPIBlockMatrix = self._build_mpi_block_matrix(
            extra_row=False, extra_col=True)
        self._kkt: MPIBlockMatrix = self._build_mpi_block_matrix(
            extra_row=True, extra_col=True)
        self._rhs: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)

        self._setup(start_t=start_t, end_t=end_t)
        self._setup_block_vectors()
        self._setup_jacs()
        self._setup_kkt_and_rhs_structure()
        self._broadcast()
Exemplo n.º 20
0
def update_archive(comm: MPI.Comm, behaviour: Sequence[float],
                   archive: Optional[np.ndarray]) -> np.ndarray:
    behaviour = comm.scatter([behaviour] * comm.size)
    if archive is None:
        return np.array([behaviour])
    return np.concatenate((archive, [behaviour]))
Exemplo n.º 21
0
 def mpi_inc(self, comm: MPI.Comm):
     stat = comm.allreduce(self, op=sumobstat_op)
     self.sum = stat.sum
     self.sumsq = stat.sumsq
     self.count = stat.count
Exemplo n.º 22
0
    def __init__(self,
                 scenarios: Sequence,
                 nonanticipative_var_identifiers: Sequence,
                 comm: MPI.Comm,
                 ownership_map: Optional[Dict] = None):
        """
        This method sets up the coupling matrices and the structure for the kkt system

        Parameters
        ----------
        scenarios: Sequence
            The scenarios for which subproblems need built
        nonanticipative_var_identifiers: Sequence
            Unique identifiers for the first stage variables. Every process should get the
            exact same list in the exact same order.
        comm: MPI.Comm
            The MPI communicator
        ownership_map: Dict
            A dictionary mapping scenario index (i.e., index into scenarios) to rank
        """
        self._num_scenarios: int = len(scenarios)
        self._num_first_stage_vars: int = len(nonanticipative_var_identifiers)
        self._first_stage_var_indices = {
            identifier: ndx
            for ndx, identifier in enumerate(nonanticipative_var_identifiers)
        }
        self._num_first_stage_vars_by_scenario: Dict[int, int] = dict()
        self._nlps: Dict[
            int,
            InteriorPointInterface] = dict()  # keys are the scenario indices
        self._scenario_ndx_to_id = dict()
        self._scenario_id_to_ndx = dict()
        self._linking_matrices: Dict[int, coo_matrix] = dict(
        )  # these get multiplied by the primal vars of the corresponding scenario
        self._link_coupling_matrices: Dict[int, coo_matrix] = dict(
        )  # these get multiplied by the coupling variables

        self._comm: MPI.Comm = comm
        self._rank: int = comm.Get_rank()
        self._size: int = comm.Get_size()

        if self._size > self._num_scenarios:
            raise ValueError('Cannot yet handle more processes than scenarios')

        if ownership_map is None:
            self._local_block_indices: Sequence[int] = _distribute_blocks(
                num_blocks=self._num_scenarios,
                rank=self._rank,
                size=self._size)
            self._ownership_map: Dict[int, int] = _get_ownership_map(
                num_blocks=self._num_scenarios, size=self._size)
        else:
            self._ownership_map = dict(ownership_map)
            self._local_block_indices = list()
            for scenario_ndx, scenario in enumerate(scenarios):
                if self._ownership_map[scenario_ndx] == self._rank:
                    self._local_block_indices.append(scenario_ndx)

        self._primals_lb: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._primals_ub: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)

        self._ineq_lb: MPIBlockVector = self._build_mpi_block_vector()
        self._ineq_ub: MPIBlockVector = self._build_mpi_block_vector()

        self._init_primals: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._primals: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._delta_primals: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)

        self._init_slacks: MPIBlockVector = self._build_mpi_block_vector()
        self._slacks: MPIBlockVector = self._build_mpi_block_vector()
        self._delta_slacks: MPIBlockVector = self._build_mpi_block_vector()

        self._init_duals_eq: MPIBlockVector = self._build_mpi_block_vector()
        self._duals_eq: MPIBlockVector = self._build_mpi_block_vector()
        self._delta_duals_eq: MPIBlockVector = self._build_mpi_block_vector()

        self._init_duals_ineq: MPIBlockVector = self._build_mpi_block_vector()
        self._duals_ineq: MPIBlockVector = self._build_mpi_block_vector()
        self._delta_duals_ineq: MPIBlockVector = self._build_mpi_block_vector()

        self._init_duals_primals_lb: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._duals_primals_lb: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._delta_duals_primals_lb: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)

        self._init_duals_primals_ub: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._duals_primals_ub: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._delta_duals_primals_ub: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)

        self._init_duals_slacks_lb: MPIBlockVector = self._build_mpi_block_vector(
        )
        self._duals_slacks_lb: MPIBlockVector = self._build_mpi_block_vector()
        self._delta_duals_slacks_lb: MPIBlockVector = self._build_mpi_block_vector(
        )

        self._init_duals_slacks_ub: MPIBlockVector = self._build_mpi_block_vector(
        )
        self._duals_slacks_ub: MPIBlockVector = self._build_mpi_block_vector()
        self._delta_duals_slacks_ub: MPIBlockVector = self._build_mpi_block_vector(
        )

        self._eq_resid: MPIBlockVector = self._build_mpi_block_vector()
        self._ineq_resid: MPIBlockVector = self._build_mpi_block_vector()
        self._grad_objective: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)
        self._jac_eq: MPIBlockMatrix = self._build_mpi_block_matrix(
            extra_row=False, extra_col=True)
        self._jac_ineq: MPIBlockMatrix = self._build_mpi_block_matrix(
            extra_row=False, extra_col=True)
        self._kkt: MPIBlockMatrix = self._build_mpi_block_matrix(
            extra_row=True, extra_col=True)
        self._rhs: MPIBlockVector = self._build_mpi_block_vector(
            extra_block=True)

        self._setup(scenarios=scenarios)
        self._setup_block_vectors()
        self._setup_jacs()
        self._setup_kkt_and_rhs_structure()