recv_buffer_shape = exchange_info[1] send_buffer_shape = exchange_info[2] x_local_shape = compute_subshape(P_x.shape, P_x.index, x_global_shape) value = (1 + rank) * (10 ** rank) a = np.full(shape=x_local_shape, fill_value=value, dtype=float) forward_input_padnd_layer = PadNd(halo_shape.astype(int), value=0, partition=P_x) adjoint_input_padnd_layer = PadNd(halo_shape.astype(int), value=value, partition=P_x) t = torch.tensor(a, requires_grad=True) t_forward_input = forward_input_padnd_layer.forward(t) t_adjoint_input = adjoint_input_padnd_layer.forward(t) halo_layer = HaloExchange(P_x, halo_shape, recv_buffer_shape, send_buffer_shape) print_sequential(cart_comm, f'rank = {rank}, t_forward_input =\n{t_forward_input.int()}') ctx = DummyContext() t_forward_exchanged = halo_layer(t_forward_input) print_sequential(cart_comm, f'rank = {rank}, t_forward_exchanged =\n{t_forward_input.int()}') print_sequential(cart_comm, f'rank = {rank}, t_adjoint_input =\n{t_adjoint_input.int()}') t_forward_exchanged.backward(t_adjoint_input) print_sequential(cart_comm, f'rank = {rank}, t_adjoint_exchanged =\n{t_adjoint_input.int()}')
def print_sequential(self, val): if self.active: print_sequential(self._comm, val)
use_ranks = ranks[:P_size] P = P_world.create_subpartition(use_ranks) P_x = P.create_cartesian_subpartition(shape) rank = P_x.rank cart_comm = P_x.comm layer = MockPoolLayer() if P_x.active: x_global_shape = np.array([1, 1, 10]) kernel_size = np.array([2]) stride = np.array([2]) padding = np.array([0]) dilation = np.array([1]) halo_shape, recv_buffer_shape, send_buffer_shape, needed_ranges = \ layer._compute_exchange_info(x_global_shape, kernel_size, stride, padding, dilation, P_x.active, P_x.shape, P_x.index) print_sequential( cart_comm, f'rank = {rank}:\nhalo_shape =\n{halo_shape}\n\ recv_buffer_shape =\n{recv_buffer_shape}\nsend_buffer_shape =\n{send_buffer_shape}\nneeded_ranges =\n{needed_ranges}' )
from mpi4py import MPI from distdl.backends.mpi.partition import MPIPartition from distdl.nn.pooling import DistributedAvgPool1d from distdl.utilities.debug import print_sequential from distdl.utilities.slicing import compute_subshape from distdl.utilities.torch import zero_volume_tensor P_world = MPIPartition(MPI.COMM_WORLD) P_world.comm.Barrier() P_x_base = P_world.create_partition_inclusive(np.arange(3)) P_x = P_x_base.create_cartesian_topology_partition([1, 1, 3]) x_global_shape = np.array([1, 1, 10]) layer = DistributedAvgPool1d(P_x, kernel_size=[2], stride=[2]) x = zero_volume_tensor() if P_x.active: x_local_shape = compute_subshape(P_x.shape, P_x.index, x_global_shape) x = torch.tensor(np.ones(shape=x_local_shape) * (P_x.rank + 1), dtype=float) x.requires_grad = True print_sequential(P_world.comm, f'rank = {P_world.rank}, input =\n{x}') y = layer(x) print_sequential(P_world.comm, f'rank = {P_world.rank}, output =\n{y}')
P_y_base = P_world.create_partition_inclusive( np.arange(P_world.size - out_size, P_world.size)) P_y = P_y_base.create_cartesian_topology_partition(out_shape) x_global_shape = np.array([7, 5]) layer = DistributedTranspose(P_x, P_y, preserve_batch=False) x = zero_volume_tensor() if P_x.active: x_local_shape = slicing.compute_subshape(P_x.shape, P_x.index, x_global_shape) x = np.zeros(x_local_shape) + P_x.rank + 1 x = torch.from_numpy(x) x.requires_grad = True print_sequential(P_world.comm, f"x_{P_world.rank}: {x}") y = layer(x) print_sequential(P_world.comm, f"y_{P_world.rank}: {y}") dy = zero_volume_tensor() if P_y.active: y_local_shape = slicing.compute_subshape(P_y.shape, P_y.index, x_global_shape) dy = np.zeros(y_local_shape) + P_y.rank + 1 dy = torch.from_numpy(dy) print_sequential(P_world.comm, f"dy_{P_world.rank}: {dy}") y.backward(dy) dx = x.grad