def __init__(self, base_kernel, device_ids, output_device=None, **kwargs): DataParallel.__init__(self, module=base_kernel, device_ids=device_ids, output_device=output_device, dim=-2) self.output_device = output_device if output_device else device_ids[0] self.__cached_x1 = torch.empty(1) self.__cached_x2 = torch.empty(1)
def __init__(self, base_kernel, device_ids, output_device=None, create_cuda_context=True, **kwargs): # Need to warm up each GPU otherwise scattering in forward will be # EXTREMELY slow. This memory will be available as soon as we leave __init__ if create_cuda_context: for d in device_ids: _ = torch.tensor([], device=d) DataParallel.__init__(self, module=base_kernel, device_ids=device_ids, output_device=output_device, dim=-2) self.output_device = output_device if output_device else device_ids[0] self.__cached_x1 = torch.empty(1) self.__cached_x2 = torch.empty(1)