def __init__(self): super().__init__() self.lin1 = _maybe_cuda(nn.Linear(1, 1, bias=False), move_to_cuda) self.lin2 = _maybe_cuda(nn.Linear(1, 1, bias=False), move_to_cuda) self.lin3 = _maybe_cuda(nn.Linear(1, 1, bias=False), move_to_cuda) self.lin4 = Nested()
def __init__(self, nested): super().__init__() # TODO: test the various init modes. move_to_cuda = fsdp_init_mode == FSDPInitMode.CUDA_BEFORE # if nested=True, the FSDP module will be nested one layer deep # and we should pick that up. if nested: self.lin1 = nn.Sequential( _maybe_cuda(fn_self._get_linear(1, 1), move_to_cuda), FSDP(_maybe_cuda(fn_self._get_linear(1, 1), move_to_cuda)), ) else: self.lin1 = FSDP( _maybe_cuda(fn_self._get_linear(1, 1), move_to_cuda) ) self.lin2 = FSDP(_maybe_cuda(fn_self._get_linear(1, 1), move_to_cuda)) self.lin3 = FSDP(_maybe_cuda(fn_self._get_linear(1, 1), move_to_cuda))
def __init__(self): super().__init__() self.nested_lin = _maybe_cuda(nn.Linear(1, 1, bias=False), move_to_cuda)