def forward(self, inputs, im_info, gt_boxes, num_boxes, Ms, Ns): #tensors,_=scatter_kwargs([inputs,im_info,gt_boxes,num_boxes], {}, self.device_ids) inputs_multi = comm.scatter(inputs, self.device_ids) im_info = comm.scatter(im_info, self.device_ids) gt_boxes = comm.scatter(gt_boxes, self.device_ids) num_boxes = comm.scatter(num_boxes, self.device_ids) #im_info, gt_boxes, num_boxes tensors = parallel_apply(self.modules, [(v, ) for v in inputs_multi], devices=self.device_ids) out = [] for i, tensor in enumerate(tensors): with torch.cuda.device(tensor.get_device()): tensors[i] = tensors[i].view( tensors[i].size(0), tensors[i].size(1) * tensors[i].size(2), tensors[i].size(3), tensors[i].size(4)) tensors[i] = tensors[i][:, :, :Ms, :Ns] tensors[i] = tensors[i].contiguous() tensors[i] = Variable(tensors[i]) out.append([ tensors[i], im_info[i].cuda(), gt_boxes[i].cuda(), num_boxes[i].cuda() ]) return out #tensors,im_info, gt_boxes, num_boxes
def forward(self, inputs): inputs_multi = comm.scatter(inputs, self.device_ids) tensors = parallel_apply(self.modules, [(v, ) for v in inputs_multi], devices=self.device_ids) out = [] for i, tensor in enumerate(tensors): with torch.cuda.device(tensor.get_device()): tensors[i] = torch.autograd.Variable(tensors[i]) out.append([tensors[i]]) return out
def _test_scatter(self, input, chunk_sizes=None, dim=0): if torch.cuda.device_count() < 2: raise unittest.SkipTest("only one GPU detected") result = comm.scatter(input, (0, 1), chunk_sizes, dim) self.assertEqual(len(result), 2) if chunk_sizes is None: chunk_sizes = tuple(repeat(input.size(dim) // 2, 2)) chunk_start = 0 for i, r in enumerate(result): chunk_end = chunk_start + chunk_sizes[i] index = [slice(None, None), slice(None, None)] index[dim] = slice(chunk_start, chunk_end) self.assertEqual(r, input[tuple(index)], 0) chunk_start = chunk_end
def forward(self, input): self.input_device = input.get_device() if input.is_cuda else -1 streams = None if self.input_device == -1: # Perform CPU to GPU copies in a background stream streams = [_get_stream(device) for device in self.target_gpus] outputs = comm.scatter(input, self.target_gpus, self.chunk_sizes, self.dim, streams) # Synchronize with the copy stream if streams is not None: for i, output in enumerate(outputs): with torch.cuda.device(self.target_gpus[i]): main_stream = torch.cuda.current_stream() main_stream.wait_stream(streams[i]) output.record_stream(main_stream) return outputs
def forward(self, input): self.input_device = input.get_device() if input.is_cuda else -1 streams = None if self.input_device == -1: # Perform CPU to GPU copies in a background stream streams = [_get_stream(device) for device in self.target_gpus] outputs = comm.scatter(input, self.target_gpus, self.chunk_sizes, self.dim, streams) # Synchronize with the copy stream if streams is not None: for i, output in enumerate(outputs): with torch.cuda.device(self.target_gpus[i]): main_stream = torch.cuda.current_stream() main_stream.wait_stream(streams[i]) output.record_stream(main_stream) return outputs
def forward(ctx, target_gpus, chunk_sizes, dim, input): target_gpus = list(map(lambda x: _get_device_index(x, True), target_gpus)) ctx.dim = dim ctx.input_device = input.get_device() if input.is_cuda else -1 streams = None if ctx.input_device == -1: # Perform CPU to GPU copies in a background stream streams = [_get_stream(device) for device in target_gpus] outputs = comm.scatter(input, target_gpus, chunk_sizes, ctx.dim, streams) # Synchronize with the copy stream if streams is not None: for i, output in enumerate(outputs): with torch.cuda.device(target_gpus[i]): main_stream = torch.cuda.current_stream() main_stream.wait_stream(streams[i]) output.record_stream(main_stream) return outputs
def get_onehot_label(labels, num_gpus, num_classes, model_parallel=False, class_split=None): # Get one-hot labels labels = labels.view(-1, 1) labels_onehot = torch.zeros(len(labels), num_classes).cuda() labels_onehot.scatter_(1, labels, 1) if not model_parallel: return labels_onehot else: label_tuple = comm.scatter(labels_onehot, range(num_gpus), class_split, dim=1) return label_tuple
def forward(self, input): self.input_device = input.get_device() if input.is_cuda else -1 return comm.scatter(input, self.target_gpus, self.chunk_sizes, self.dim)
def backward(self, grad_output): return comm.scatter(grad_output, self.input_gpus, self.input_sizes, self.dim)
def backward(self, grad_output): return comm.scatter(grad_output, self.input_gpus, self.input_sizes, self.dim)