def test_cuda_small_tensors(self): # Check multiple small tensors which will likely use the same # underlying cached allocation ctx = mp.get_context('spawn') tensors = [] for i in range(5): device = i % 2 tensors += [torch.arange(i * 5, (i + 1) * 5).cuda(device)] inq = ctx.Queue() outq = ctx.Queue() inq.put(tensors) p = ctx.Process(target=sum_tensors, args=(inq, outq)) p.start() results = [] for i in range(5): results.append(outq.get()) p.join() for i, tensor in enumerate(tensors): v, device, tensor_size, storage_size = results[i] self.assertEqual(v, torch.arange(i * 5, (i + 1) * 5).sum()) self.assertEqual(device, i % 2) self.assertEqual(tensor_size, 5) self.assertEqual(storage_size, 5)
def __call__(self, grid): batch_size, _, grid_dimX, grid_dimY, grid_dimZ = grid.size() k = 1.0 x_coords = 2.0 * k * torch.arange(grid_dimX, dtype=torch.float32).unsqueeze(1).unsqueeze(1 ).expand(grid_dimX, grid_dimY, grid_dimZ) / (grid_dimX - 1.0) - 1.0 y_coords = 2.0 * k * torch.arange(grid_dimY, dtype=torch.float32).unsqueeze(1).unsqueeze(0 ).expand(grid_dimX, grid_dimY, grid_dimZ) / (grid_dimY - 1.0) - 1.0 z_coords = 2.0 * k * torch.arange(grid_dimZ, dtype=torch.float32).unsqueeze(0).unsqueeze(0 ).expand(grid_dimX, grid_dimY, grid_dimZ) / (grid_dimZ - 1.0) - 1.0 coords = torch.stack((x_coords, y_coords, z_coords), dim=0) if self.with_r: rs = ((x_coords ** 2) + (y_coords ** 2) + (z_coords ** 2)) ** 0.5 rs = k * rs / torch.max(rs) rs = torch.unsqueeze(rs, dim=0) coords = torch.cat((coords, rs), dim=0) coords = torch.unsqueeze(coords, dim=0).repeat(batch_size, 1, 1, 1, 1) grid = torch.cat((coords.to(grid.device), grid), dim=1) return grid
def test_broadcast_subspace(self): a = zeros((100, 100)) v = Variable(torch.arange(0, 100))[:, None] b = Variable(torch.arange(99, -1, -1).long()) a[b] = v expected = b.double().unsqueeze(1).expand(100, 100) self.assertEqual(a, expected)
def __call__(self, spec_f): spec_f, is_variable = _check_is_variable(spec_f) n_fft = spec_f.size(2) m_min = 0. if self.f_min == 0 else 2595 * np.log10(1. + (self.f_min / 700)) m_max = 2595 * np.log10(1. + (self.f_max / 700)) m_pts = torch.linspace(m_min, m_max, self.n_mels + 2) f_pts = (700 * (10**(m_pts / 2595) - 1)) bins = torch.floor(((n_fft - 1) * 2) * f_pts / self.sr).long() fb = torch.zeros(n_fft, self.n_mels) for m in range(1, self.n_mels + 1): f_m_minus = bins[m - 1].item() f_m = bins[m].item() f_m_plus = bins[m + 1].item() if f_m_minus != f_m: fb[f_m_minus:f_m, m - 1] = (torch.arange(f_m_minus, f_m) - f_m_minus) / (f_m - f_m_minus) if f_m != f_m_plus: fb[f_m:f_m_plus, m - 1] = (f_m_plus - torch.arange(f_m, f_m_plus)) / (f_m_plus - f_m) fb = Variable(fb) spec_m = torch.matmul(spec_f, fb) # (c, l, n_fft) dot (n_fft, n_mels) -> (c, l, n_mels) return spec_m if is_variable else spec_m.data
def meshgrid(x, y, row_major=True): '''Return meshgrid in range x & y. Args: x: (int) first dim range. y: (int) second dim range. row_major: (bool) row major or column major. Returns: (tensor) meshgrid, sized [x*y,2] Example: >> meshgrid(3,2) 0 0 1 0 2 0 0 1 1 1 2 1 [torch.FloatTensor of size 6x2] >> meshgrid(3,2,row_major=False) 0 0 0 1 0 2 1 0 1 1 1 2 [torch.FloatTensor of size 6x2] ''' a = torch.arange(0,x) b = torch.arange(0,y) xx = a.repeat(y).view(-1,1) yy = b.view(-1,1).repeat(1,x).view(-1,1) return torch.cat([xx,yy],1) if row_major else torch.cat([yy,xx],1)
def make_positions(tensor, padding_idx, left_pad, onnx_trace=False): """Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols are ignored, but it is necessary to specify whether padding is added on the left side (left_pad=True) or right side (left_pad=False). """ if onnx_trace: range_buf = torch._dim_arange(like=tensor, dim=1) + padding_idx + 1 mask = tensor.ne(padding_idx) positions = range_buf.expand_as(tensor) if left_pad: positions = positions - mask.size(1) + mask.long().sum(dim=1).unsqueeze(1) return positions * mask.long() + positions * (1 - mask.long()) max_pos = padding_idx + 1 + tensor.size(1) if not hasattr(make_positions, 'range_buf'): make_positions.range_buf = tensor.new() make_positions.range_buf = make_positions.range_buf.type_as(tensor) if make_positions.range_buf.numel() < max_pos: torch.arange(padding_idx + 1, max_pos, out=make_positions.range_buf) mask = tensor.ne(padding_idx) positions = make_positions.range_buf[:tensor.size(1)].expand_as(tensor) if left_pad: positions = positions - mask.size(1) + mask.long().sum(dim=1).unsqueeze(1) return tensor.clone().masked_scatter_(mask, positions[mask])
def test_int_assignment(self): x = Variable(torch.arange(0, 4).view(2, 2)) x[1] = 5 self.assertEqual(x.data.tolist(), [[0, 1], [5, 5]]) x = Variable(torch.arange(0, 4).view(2, 2)) x[1] = Variable(torch.arange(5, 7)) self.assertEqual(x.data.tolist(), [[0, 1], [5, 6]])
def test_int_assignment(self): x = torch.arange(0, 4).view(2, 2) x[1] = 5 self.assertEqual(x.tolist(), [[0, 1], [5, 5]]) x = torch.arange(0, 4).view(2, 2) x[1] = torch.arange(5, 7) self.assertEqual(x.tolist(), [[0, 1], [5, 6]])
def test_byte_tensor_assignment(self): x = Variable(torch.arange(0, 16).view(4, 4)) b = Variable(torch.ByteTensor([True, False, True, False])) value = Variable(torch.Tensor([3, 4, 5, 6])) x[b] = value self.assertEqual(x[0], value) self.assertEqual(x[1].data, torch.arange(4, 8)) self.assertEqual(x[2], value) self.assertEqual(x[3].data, torch.arange(12, 16))
def test_byte_tensor_assignment(self): x = torch.arange(0., 16).view(4, 4) b = torch.ByteTensor([True, False, True, False]) value = torch.tensor([3., 4., 5., 6.]) x[b] = value self.assertEqual(x[0], value) self.assertEqual(x[1], torch.arange(4, 8)) self.assertEqual(x[2], value) self.assertEqual(x[3], torch.arange(12, 16))
def enumerate_support(self): total_count = int(self.total_count.max()) if not self.total_count.min() == total_count: raise NotImplementedError("Inhomogeneous total count not supported by `enumerate_support`.") values = self._new(1 + total_count,) torch.arange(1 + total_count, out=values) values = values.view((-1,) + (1,) * len(self._batch_shape)) values = values.expand((-1,) + self._batch_shape) return values
def __init__(self, train_size, batch_size): self.num_data = train_size self.num_per_batch = int(train_size / batch_size) self.batch_size = batch_size self.range = torch.arange(0,batch_size).view(1, batch_size).long() self.leftover_flag = False if train_size % batch_size: self.leftover = torch.arange(self.num_per_batch*batch_size, train_size).long() self.leftover_flag = True
def backward(ctx, grad_output): idx = grad_output.data.new().long() torch.arange(0, ctx.input_numel, out=idx) idx = idx.view(ctx.input_size) idx_unfolded = idx.unfold(ctx.dim, ctx.size, ctx.step) idx_unfolded = idx_unfolded.contiguous().view(-1) grad_input = Variable(grad_output.data.new(ctx.input_numel).zero_()) grad_output = grad_output.contiguous().view(-1) grad_input = grad_input.index_add(0, Variable(idx_unfolded), grad_output) return grad_input.view(ctx.input_size), None, None, None
def __init__(self, d_model, dropout, max_len=5000): super(PositionalEncoding, self).__init__() self.dropout = torch.nn.Dropout(p=dropout) pe = torch.zeros(max_len, d_model) position = torch.arange(0., max_len).unsqueeze(1) div_term = torch.exp(torch.arange(0., d_model, 2) * -(math.log(10000.0) / d_model)) pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0) self.register_buffer("pe", pe)
def findLR(self, model, optimizer, writer, start_lr=1e-7, end_lr=10, num_iters=50): model.train() losses = [] lrs = np.logspace(np.log10(start_lr), np.log10(end_lr), num_iters) for lr in lrs: # Update LR for group in optimizer.param_groups: group['lr'] = lr batch = next(iter(self.data_loaders[0])) input_images, depthGT, maskGT = utils.unpack_batch_fixed(batch, self.cfg.device) # ------ define ground truth------ XGT, YGT = torch.meshgrid([torch.arange(self.cfg.outH), # [H,W] torch.arange(self.cfg.outW)]) # [H,W] XGT, YGT = XGT.float(), YGT.float() XYGT = torch.cat([ XGT.repeat([self.cfg.outViewN, 1, 1]), YGT.repeat([self.cfg.outViewN, 1, 1])], dim=0) #[2V,H,W] XYGT = XYGT.unsqueeze(dim=0).to(self.cfg.device) #[1,2V,H,W] with torch.set_grad_enabled(True): optimizer.zero_grad() XYZ, maskLogit = model(input_images) XY = XYZ[:, :self.cfg.outViewN * 2, :, :] depth = XYZ[:, self.cfg.outViewN * 2:self.cfg.outViewN * 3, :, :] mask = (maskLogit > 0).byte() # ------ Compute loss ------ loss_XYZ = self.l1(XY, XYGT) loss_XYZ += self.l1(depth.masked_select(mask), depthGT.masked_select(mask)) loss_mask = self.sigmoid_bce(maskLogit, maskGT) loss = loss_mask + self.cfg.lambdaDepth * loss_XYZ # Update weights loss.backward() # True Weight decay if self.cfg.trueWD is not None: for group in optimizer.param_groups: for param in group['params']: param.data = param.data.add( -self.cfg.trueWD * group['lr'], param.data) optimizer.step() losses.append(loss.item()) fig, ax = plt.subplots() ax.plot(lrs, losses) ax.set_xlabel('learning rate') ax.set_ylabel('loss') ax.set_xscale('log') writer.add_figure('findLR', fig)
def __call__(self, image): x_coords = 2.0 * torch.arange(self.image_height).unsqueeze( 1).expand(self.image_height, self.image_width) / 255.0 - 1.0 y_coords = 2.0 * torch.arange(self.image_width).unsqueeze( 0).expand(self.image_height, self.image_width) / 255.0 - 1.0 coords = torch.stack((x_coords, y_coords), dim=0) image = torch.cat((coords, image), dim=0) return image
def __init__(self, input_dim: int, max_len: int = 5000) -> None: super().__init__() # Compute the positional encodings once in log space. positional_encoding = torch.zeros(max_len, input_dim, requires_grad=False) position = torch.arange(0, max_len).unsqueeze(1).float() div_term = torch.exp(torch.arange(0, input_dim, 2).float() * -(math.log(10000.0) / input_dim)) positional_encoding[:, 0::2] = torch.sin(position * div_term) positional_encoding[:, 1::2] = torch.cos(position * div_term) positional_encoding = positional_encoding.unsqueeze(0) self.register_buffer('positional_encoding', positional_encoding)
def __init__(self, roi_size = 128, n_segments = 49): super().__init__() self.roi_size = roi_size self.n_segments = n_segments X_grid = torch.arange(0, roi_size).view(1, -1).expand(1, 1, roi_size, roi_size) Y_grid = torch.arange(0, roi_size).view(-1, 1).expand(1, 1, roi_size, roi_size) self.X_grid = nn.Parameter(X_grid.contiguous(), requires_grad=False) self.Y_grid = nn.Parameter(Y_grid.contiguous(), requires_grad=False)
def get_subtree(tree, actions, batch_size, num_actions): # gets the subtree corresponding to actions taken action_indices = actions[:,0] output = [] for i, x in enumerate(tree[1:]): batch_starts = cudify(torch.arange(0, batch_size) * x.size(0) / batch_size) indices = [] for b in range(batch_size): indices.append(cudify(torch.arange(action_indices[b] * num_actions**i, (action_indices[b]+1) * num_actions**i)) + batch_starts[b]) indices = torch.cat(indices).long() output.append(x[indices]) return output
def __init__(self, dropout, dim, max_len=5000): pe = torch.zeros(max_len, dim) position = torch.arange(0, max_len).unsqueeze(1) div_term = torch.exp(torch.arange(0, dim, 2) * -(math.log(10000.0) / dim)) pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(1) super(PositionalEncoding, self).__init__() self.register_buffer('pe', pe) self.dropout = nn.Dropout(p=dropout) self.dim = dim
def make_positions(tokens, padding_idx, left_pad, offset=0): seqlen = tokens.size(1) if not hasattr(make_positions, 'range'): make_positions.range = tokens.new() if make_positions.range.numel() < offset + seqlen: # offset positions by the padding index torch.arange(padding_idx + 1, padding_idx + 1 + offset + seqlen, out=make_positions.range) mask = tokens.ne(padding_idx) positions = make_positions.range[offset:offset+seqlen].expand_as(tokens) if left_pad: positions = positions - mask.size(1) + mask.long().sum(dim=1).unsqueeze(1) return tokens.clone().masked_scatter_(mask, positions[mask])
def setUp(self): self.v = Variable(torch.Tensor([3])) self.vs = Variable(torch.Tensor([[0], [1], [2], [3]])) self.vs_expanded = self.vs.expand(4, 3) self.test_data = Variable(torch.Tensor([[3], [3], [3]])) self.batch_test_data_1 = Variable(torch.arange(0, 4).unsqueeze(1).expand(4, 3)) self.batch_test_data_2 = Variable(torch.arange(4, 8).unsqueeze(1).expand(4, 3)) self.batch_test_data_3 = Variable(torch.Tensor([[3], [3], [3], [3]])) self.expected_support = [[0], [1], [2], [3]] self.expected_support_non_vec = [3] self.analytic_mean = 3 self.analytic_var = 0 self.n_samples = 10
def __init__(self, beam_size, batch_size, pad, bos, eos, n_best, mb_device, global_scorer, min_length, max_length, return_attention, block_ngram_repeat, exclusion_tokens, memory_lengths, stepwise_penalty, ratio): super(BeamSearch, self).__init__( pad, bos, eos, batch_size, mb_device, beam_size, min_length, block_ngram_repeat, exclusion_tokens, return_attention, max_length) # beam parameters self.global_scorer = global_scorer self.beam_size = beam_size self.n_best = n_best self.batch_size = batch_size self.ratio = ratio # result caching self.hypotheses = [[] for _ in range(batch_size)] # beam state self.top_beam_finished = torch.zeros([batch_size], dtype=torch.uint8) self.best_scores = torch.full([batch_size], -1e10, dtype=torch.float, device=mb_device) self._batch_offset = torch.arange(batch_size, dtype=torch.long) self._beam_offset = torch.arange( 0, batch_size * beam_size, step=beam_size, dtype=torch.long, device=mb_device) self.topk_log_probs = torch.tensor( [0.0] + [float("-inf")] * (beam_size - 1), device=mb_device ).repeat(batch_size) self.select_indices = None self._memory_lengths = memory_lengths # buffers for the topk scores and 'backpointer' self.topk_scores = torch.empty((batch_size, beam_size), dtype=torch.float, device=mb_device) self.topk_ids = torch.empty((batch_size, beam_size), dtype=torch.long, device=mb_device) self._batch_index = torch.empty([batch_size, beam_size], dtype=torch.long, device=mb_device) self.done = False # "global state" of the old beam self._prev_penalty = None self._coverage = None self._stepwise_cov_pen = ( stepwise_penalty and self.global_scorer.has_cov_pen) self._vanilla_cov_pen = ( not stepwise_penalty and self.global_scorer.has_cov_pen) self._cov_pen = self.global_scorer.has_cov_pen
def updateGradInput(self, input, gradOutput): input, mask = input if input.type() == 'torch.cuda.FloatTensor': torch.arange(0, mask.nelement(), out=self._maskIndexBufferCPU).resize_(mask.size()) self._maskIndexBuffer.resize_(self._maskIndexBufferCPU.size()).copy_(self._maskIndexBufferCPU) else: torch.arange(0, mask.nelement(), out=self._maskIndexBuffer).resize_(mask.size()) torch.masked_select(self._maskIndexBuffer, mask, out=self._maskIndices) self._gradBuffer.resize_(input.nelement()).zero_() self._gradBuffer.scatter_(0, self._maskIndices, gradOutput) self._gradBuffer.resize_(input.size()) self.gradInput = [self._gradBuffer, self._gradMask.resize_(mask.size()).fill_(0)] return self.gradInput
def test_inference_whiten_vsgp(): N = 1000 X = dist.Uniform(torch.zeros(N), torch.ones(N)*5).sample() y = 0.5 * torch.sin(3*X) + dist.Normal(torch.zeros(N), torch.ones(N)*0.5).sample() kernel = RBF(input_dim=1) Xu = torch.arange(0, 5.5, 0.5) vsgp = VariationalSparseGP(X, y, kernel, Xu, Gaussian(), whiten=True) vsgp.optimize(optim.Adam({"lr": 0.01}), num_steps=1000) Xnew = torch.arange(0, 5.05, 0.05) loc, var = vsgp(Xnew, full_cov=False) target = 0.5 * torch.sin(3*Xnew) assert_equal((loc - target).abs().mean().item(), 0, prec=0.07)
def test_inference_sgpr(): N = 1000 X = dist.Uniform(torch.zeros(N), torch.ones(N)*5).sample() y = 0.5 * torch.sin(3*X) + dist.Normal(torch.zeros(N), torch.ones(N)*0.5).sample() kernel = RBF(input_dim=1) Xu = torch.arange(0, 5.5, 0.5) sgpr = SparseGPRegression(X, y, kernel, Xu) sgpr.optimize(optim.Adam({"lr": 0.01}), num_steps=1000) Xnew = torch.arange(0, 5.05, 0.05) loc, var = sgpr(Xnew, full_cov=False) target = 0.5 * torch.sin(3*Xnew) assert_equal((loc - target).abs().mean().item(), 0, prec=0.07)
def test_step(self): v = Variable(torch.arange(10)) self.assertEqual(v[::1], v) self.assertEqual(v[::2].data.tolist(), [0, 2, 4, 6, 8]) self.assertEqual(v[::3].data.tolist(), [0, 3, 6, 9]) self.assertEqual(v[::11].data.tolist(), [0]) self.assertEqual(v[1:6:2].data.tolist(), [1, 3, 5])
def imgEncodeTorch(self, abimg): abimg = abimg.cuda() w, h = abimg.shape[1], abimg.shape[2] label = torch.zeros((w*h, 313)) label = label.cuda() (dists, indexes) = self.nbrs.kneighbors( abimg.view(abimg.shape[0], -1).t(), self.NN) dists = torch.from_numpy(dists).float().cuda() indexes = torch.from_numpy(indexes).cuda() weights = torch.exp(-dists**2/(2*self.sigma**2)).cuda() weights = weights/torch.sum(weights, dim=1).view(-1, 1) pixel_indexes = torch.Tensor.long(torch.arange( start=0, end=abimg.shape[1]*abimg.shape[2])[:, np.newaxis]) pixel_indexes = pixel_indexes.cuda() label[pixel_indexes, indexes] = weights label = label.t().contiguous().view(313, w, h) rebal_indexes = indexes[:, 0] rebal_weights = self.weights[rebal_indexes] rebal_weights = rebal_weights.view(w, h) rebal_label = rebal_weights * label return rebal_label
def test_int_indices_broadcast(self): # From the NumPy indexing example x = Variable(torch.arange(0, 12).view(4, 3)) rows = Variable(torch.LongTensor([0, 3])) columns = Variable(torch.LongTensor([0, 2])) result = x[rows[:, None], columns] self.assertEqual(result.data.tolist(), [[0, 2], [9, 11]])
def forward(self, x, labels): """ Args: - x: feature matrix with shape (batch_size, feat_dim). - labels: ground truth labels with shape (num_classes). """ batch_size = x.size(0) distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \ torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t() distmat.addmm_(1, -2, x, self.centers.t()) classes = torch.arange(self.num_classes).long() if self.use_gpu: classes = classes.cuda() labels = labels.unsqueeze(1).expand(batch_size, self.num_classes) mask = labels.eq(classes.expand(batch_size, self.num_classes)) dist = [] for i in range(batch_size): value = distmat[i][mask[i]] value = value.clamp(min=1e-12, max=1e+12) # for numerical stability dist.append(value) dist = torch.cat(dist) loss = dist.mean() return loss
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None, eval = False, flip_fms = None, extra = {}): # input p2-p5 pred_emd_pred_cls_0, pred_emd_pred_delta_0, pred_emd_pred_cls_1, pred_emd_pred_delta_1,\ pred_ref_pred_cls_0, pred_ref_pred_delta_0, pred_ref_pred_cls_1, pred_ref_pred_delta_1,\ pool_features,refine_features = self._half_forward(fpn_fms, rcnn_rois, keep_pool_feature = True) if self.training or eval: loss0 = emd_loss( pred_emd_pred_delta_0, pred_emd_pred_cls_0, pred_emd_pred_delta_1, pred_emd_pred_cls_1, bbox_targets, labels) loss1 = emd_loss( pred_emd_pred_delta_1, pred_emd_pred_cls_1, pred_emd_pred_delta_0, pred_emd_pred_cls_0, bbox_targets, labels) loss2 = emd_loss( pred_ref_pred_delta_0, pred_ref_pred_cls_0, pred_ref_pred_delta_1, pred_ref_pred_cls_1, bbox_targets, labels) loss3 = emd_loss( pred_ref_pred_delta_1, pred_ref_pred_cls_1, pred_ref_pred_delta_0, pred_ref_pred_cls_0, bbox_targets, labels) loss_rcnn = torch.cat([loss0, loss1], axis=1) loss_ref = torch.cat([loss2, loss3], axis=1) with torch.no_grad(): _, min_indices_rcnn = loss_rcnn.min(axis=1) _, min_indices_ref = loss_ref.min(axis=1) loss_rcnn = loss_rcnn[torch.arange(loss_rcnn.shape[0]), min_indices_rcnn] loss_rcnn = loss_rcnn.sum()/loss_rcnn.shape[0] loss_ref = loss_ref[torch.arange(loss_ref.shape[0]), min_indices_ref] loss_ref = loss_ref.sum()/loss_ref.shape[0] #loss, _ = loss.min(axis=1) #loss_emd = loss.sum()/loss.shape[0] loss_dict = {} loss_dict['loss_rcnn_emd'] = loss_rcnn loss_dict['loss_ref_emd'] = loss_ref if self.args.flip_JSD: if self.args.flip_JSD_0g: with torch.no_grad(): _, _, _, _,\ f_pred_ref_pred_cls_0, _, \ f_pred_ref_pred_cls_1, _ = self._half_forward(flip_fms, rcnn_rois) else: _, _, _, _,\ f_pred_ref_pred_cls_0, _, \ f_pred_ref_pred_cls_1, _ = self._half_forward(flip_fms, rcnn_rois) loss_flip_JSD = _flip_loss_JSD(F.softmax(pred_ref_pred_cls_0, dim=-1),F.softmax(f_pred_ref_pred_cls_0, dim=-1)) loss_flip_JSD += _flip_loss_JSD(F.softmax(pred_ref_pred_cls_1, dim=-1),F.softmax(f_pred_ref_pred_cls_1, dim=-1)) loss_dict['loss_flip_JSD'] = loss_flip_JSD if self.args.diff_loss: loss_dict['diff_loss'] = _diff_loss(refine_features[0],refine_features[1]) return loss_dict else: pred_ref_scores_0 = F.softmax(pred_ref_pred_cls_0, dim=-1) pred_ref_scores_1 = F.softmax(pred_ref_pred_cls_1, dim=-1) pred_bbox_0 = restore_bbox(rcnn_rois[:, 1:5], pred_ref_pred_delta_0, True) pred_bbox_1 = restore_bbox(rcnn_rois[:, 1:5], pred_ref_pred_delta_1, True) pred_bbox_0 = torch.cat([pred_bbox_0, pred_ref_scores_0[:, 1].reshape(-1,1)], dim=1) pred_bbox_1 = torch.cat([pred_bbox_1, pred_ref_scores_1[:, 1].reshape(-1,1)], dim=1) pred_bbox = torch.cat((pred_bbox_0, pred_bbox_1), dim=1).reshape(-1,5) return pred_bbox
We will see what these mean in this tutorial. """ import torch from torch import nn import torch.nn.functional as F ## Tensors # Tensors are the most basic data type in pytorch. # They are very similar to numpy arrays in terms of the interface and supported functions. # For example: a = torch.tensor([1, 2, 3]) # numpy: a = np.array([1, 2, 3]) b = torch.arange(12).reshape(4, 3) # numpy: b = np.arange(12).reshape((4, 3)) c = torch.full((2, 2), 7) # numpy: c = np.full((2, 2), 7) print(a + b) # numpy: a + b; note the broadcasting here print(b.sum(dim=1)) # numpy: b.sum(axis=1) print(a.type(torch.float)) # equivalently a.float() or a.to(torch.float); numpy: a.astype(np.float) print(b[1:3, 2]) # numpy: b[1:3, 2] # pytorch tensors are more powerful than numpy arrays. For example, you can move them to GPUs for # faster computation (e.g., matrix multiplication). b.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")) # They also support gradient tracking, as we will see below. # You may ask, why did we introduce numpy first then? First of all, numpy is still very widely used # beyond automatic differentiation. Even for ML/NLP, it is often used for data loading, metric # calculation, etc. Second, since pytorch tensors have a very similar interface as numpy arrays, # understanding the latter makes it easy to learn the first.
def forward(self, x, targets=None): nA = self.num_anchors nB = x.size(0) nG = x.size(2) stride = self.image_dim / nG # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous() # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # Calculate offsets for each grid grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor) grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor) scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors]) anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1)) anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1)) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + grid_x pred_boxes[..., 1] = y.data + grid_y pred_boxes[..., 2] = torch.exp(w.data) * anchor_w pred_boxes[..., 3] = torch.exp(h.data) * anchor_h # Training if targets is not None: if x.is_cuda: self.mse_loss = self.mse_loss.cuda() self.bce_loss = self.bce_loss.cuda() self.ce_loss = self.ce_loss.cuda() nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets( pred_boxes=pred_boxes.cpu().data, pred_conf=pred_conf.cpu().data, pred_cls=pred_cls.cpu().data, target=targets.cpu().data, anchors=scaled_anchors.cpu().data, num_anchors=nA, num_classes=self.num_classes, grid_size=nG, ignore_thres=self.ignore_thres, img_dim=self.image_dim, ) nProposals = int((pred_conf > 0.5).sum().item()) recall = float(nCorrect / nGT) if nGT else 1 precision = float(nCorrect / nProposals) # Handle masks mask = Variable(mask.type(ByteTensor)) conf_mask = Variable(conf_mask.type(ByteTensor)) # Handle target variables tx = Variable(tx.type(FloatTensor), requires_grad=False) ty = Variable(ty.type(FloatTensor), requires_grad=False) tw = Variable(tw.type(FloatTensor), requires_grad=False) th = Variable(th.type(FloatTensor), requires_grad=False) tconf = Variable(tconf.type(FloatTensor), requires_grad=False) tcls = Variable(tcls.type(LongTensor), requires_grad=False) # Get conf mask where gt and where there is no gt conf_mask_true = mask conf_mask_false = conf_mask - mask # Mask outputs to ignore non-existing objects loss_x = self.mse_loss(x[mask], tx[mask]) loss_y = self.mse_loss(y[mask], ty[mask]) loss_w = self.mse_loss(w[mask], tw[mask]) loss_h = self.mse_loss(h[mask], th[mask]) loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false])\ + self.bce_loss(pred_conf[conf_mask_true], tconf[conf_mask_true]) loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1)) loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls return ( loss, loss_x.item(), loss_y.item(), loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), recall, precision, ) else: # If not in training phase return predictions output = torch.cat( ( pred_boxes.view(nB, -1, 4) * stride, pred_conf.view(nB, -1, 1), pred_cls.view(nB, -1, self.num_classes), ), -1, ) return output
def fliphor(self, inputs): inv_idx = torch.arange(inputs.size(3) - 1, -1, -1).long() # N x C x H x W return inputs.index_select(3, inv_idx)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--gpu_ids", default='0', type=str) parser.add_argument( "--bert_config_file", default= 'check_points/pretrain_models/bert_wwm_ext_base/bert_config.json', type=str, help="The config json file corresponding to the pre-trained BERT model. " "This specifies the model architecture.") parser.add_argument( "--vocab_file", default='check_points/pretrain_models/bert_wwm_ext_base/vocab.txt', type=str, help="The vocabulary file that the BERT model was trained on.") parser.add_argument( "--init_restore_dir", required=True, type=str, help="Initial checkpoint (usually from a pre-trained BERT model).") parser.add_argument("--input_dir", required=True, default='dataset/CHID') parser.add_argument( "--output_dir", required=True, type=str, help= "The output directory where the model checkpoints and predictions will be written." ) parser.add_argument( "--predict_file", required=True, type=str, help="Initial checkpoint (usually from a pre-trained BERT model).") parser.add_argument('--output_file', type=str, default='predictions_test.json') ## Other parameters parser.add_argument( "--max_seq_length", default=64, type=int, help= "The maximum total input sequence length after WordPiece tokenization. Sequences " "longer than this will be truncated, and sequences shorter than this will be padded." ) parser.add_argument( "--max_num_choices", default=10, type=int, help= "The maximum number of cadicate answer, shorter than this will be padded." ) parser.add_argument("--predict_batch_size", default=16, type=int, help="Total batch size for predictions.") parser.add_argument( "--do_lower_case", default=True, action='store_true', help= "Whether to lower case the input text. True for uncased models, False for cased models." ) parser.add_argument( '--fp16', default=True, action='store_true', help="Whether to use 16-bit float precision instead of 32-bit") args = parser.parse_args() print(args) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_ids device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") print("device: {}, distributed training: {}, 16-bits training: {}".format( device, bool(args.local_rank != -1), args.fp16)) tokenizer = BertTokenizer(vocab_file=args.vocab_file, do_lower_case=args.do_lower_case) test_example_file = os.path.join( args.input_dir, 'test_examples_{}.pkl'.format(str(args.max_seq_length))) test_feature_file = os.path.join( args.input_dir, 'test_features_{}.pkl'.format(str(args.max_seq_length))) eval_features = generate_input(args.predict_file, None, test_example_file, test_feature_file, tokenizer, max_seq_length=args.max_seq_length, max_num_choices=args.max_num_choices, is_training=False) # Prepare model if 'albert' in args.bert_config_file: bert_config = ALBertConfig.from_json_file(args.bert_config_file) model = ALBertForMultipleChoice(bert_config, num_choices=args.max_num_choices) else: bert_config = BertConfig.from_json_file(args.bert_config_file) model = BertForMultipleChoice(bert_config, num_choices=args.max_num_choices) model = model.to(device) if args.init_restore_dir.endswith('.pth') or \ args.init_restore_dir.endswith('.pt') or \ args.init_restore_dir.endswith('.bin'): pass else: args.init_restore_dir = glob(args.init_restore_dir + '*.pth') assert len(args.init_restore_dir) == 1 args.init_restore_dir = args.init_restore_dir[0] torch_init_model(model, args.init_restore_dir) if args.fp16: model = model.half() print("***** Running predictions *****") print("Num split examples = %d", len(eval_features)) print("Batch size = %d", args.predict_batch_size) all_example_ids = [f.example_id for f in eval_features] all_tags = [f.tag for f in eval_features] all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_masks = torch.tensor([f.input_masks for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_choice_masks = torch.tensor([f.choice_masks for f in eval_features], dtype=torch.long) all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_masks, all_segment_ids, all_choice_masks, all_example_index) # Run prediction for full data eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.predict_batch_size) model.eval() all_results = [] print("Start evaluating") for input_ids, input_masks, segment_ids, choice_masks, example_indices in tqdm( eval_dataloader, desc="Evaluating", disable=None): if len(all_results) == 0: print('shape of input_ids: {}'.format(input_ids.shape)) input_ids = input_ids.to(device) input_masks = input_masks.to(device) segment_ids = segment_ids.to(device) with torch.no_grad(): batch_logits = model(input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_masks, labels=None) for i, example_index in enumerate(example_indices): logits = batch_logits[i].detach().cpu().tolist() eval_feature = eval_features[example_index.item()] unique_id = int(eval_feature.unique_id) all_results.append( RawResult(unique_id=unique_id, example_id=all_example_ids[unique_id], tag=all_tags[unique_id], logit=logits)) else: print("prediction is over") print('decoder raw results') tmp_predict_file = os.path.join(args.output_dir, "test_raw_predictions.pkl") output_prediction_file = os.path.join(args.output_dir, args.output_file) results = get_final_predictions(all_results, tmp_predict_file, g=True) write_predictions(results, output_prediction_file) print('predictions saved to {}'.format(output_prediction_file))
def find_top_rpn_proposals( proposals, pred_objectness_logits, images, nms_thresh, pre_nms_topk, post_nms_topk, min_box_side_len, training, ): """ For each feature map, select the `pre_nms_topk` highest scoring proposals, apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` highest scoring proposals among all the feature maps if `training` is True, otherwise, returns the highest `post_nms_topk` scoring proposals for each feature map. Args: proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4). All proposal predictions on the feature maps. pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). images (ImageList): Input images as an :class:`ImageList`. nms_thresh (float): IoU threshold to use for NMS pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. When RPN is run on multiple feature maps (as in FPN) this number is per feature map. post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. When RPN is run on multiple feature maps (as in FPN) this number is total, over all feature maps. min_box_side_len (float): minimum proposal box side length in pixels (absolute units wrt input images). training (bool): True if proposals are to be used in training, otherwise False. This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." comment. Returns: proposals (list[Instances]): list of N Instances. The i-th Instances stores post_nms_topk object proposals for image i. """ image_sizes = images.image_sizes # in (h, w) order num_images = len(image_sizes) device = proposals[0].device # 1. Select top-k anchor for every level and every image topk_scores = [] # #lvl Tensor, each of shape N x topk topk_proposals = [] level_ids = [] # #lvl Tensor, each of shape (topk,) batch_idx = torch.arange(num_images, device=device) for level_id, proposals_i, logits_i in zip( itertools.count(), proposals, pred_objectness_logits ): Hi_Wi_A = logits_i.shape[1] num_proposals_i = min(pre_nms_topk, Hi_Wi_A) # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812) # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) logits_i, idx = logits_i.sort(descending=True, dim=1) topk_scores_i = logits_i[batch_idx, :num_proposals_i] topk_idx = idx[batch_idx, :num_proposals_i] # each is N x topk topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 4 topk_proposals.append(topk_proposals_i) topk_scores.append(topk_scores_i) level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device)) # 2. Concat all levels together topk_scores = cat(topk_scores, dim=1) topk_proposals = cat(topk_proposals, dim=1) level_ids = cat(level_ids, dim=0) # 3. For each image, run a per-level NMS, and choose topk results. results = [] for n, image_size in enumerate(image_sizes): boxes = Boxes(topk_proposals[n]) scores_per_img = topk_scores[n] boxes.clip(image_size) # filter empty boxes keep = boxes.nonempty(threshold=min_box_side_len) lvl = level_ids if keep.sum().item() != len(boxes): boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], level_ids[keep] keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh) # In Detectron1, there was different behavior during training vs. testing. # (https://github.com/facebookresearch/Detectron/issues/459) # During training, topk is over the proposals from *all* images in the training batch. # During testing, it is over the proposals for each image separately. # As a result, the training behavior becomes batch-dependent, # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size. # This bug is addressed in Detectron2 to make the behavior independent of batch size. keep = keep[:post_nms_topk] res = Instances(image_size) res.proposal_boxes = boxes[keep] res.objectness_logits = scores_per_img[keep] results.append(res) return results
def mask_cross_entropy(pred, target, label): num_rois = pred.size()[0] inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) pred_slice = pred[inds, label].squeeze(1) return F.binary_cross_entropy_with_logits( pred_slice, target, reduction='elementwise_mean')[None]
def compute_log_R_O_nfac(log_p, so_perms=None): """ Computes all first and second order log ratio's by computing P(S) for all second order sets leaving two elements out of S where the individual P(S) are computed by naive enumeration of all permutations This is inefficient especially for large sample sizes but can be used to validate alternative implementations """ k = log_p.size(-1) if k == 1: # If k = 1, second order is not defined, and first order # P(S\{s}) / P(S) = P{{}} / P({s}) = 1 / p_s # log (1 / p_s) = - log p_s return -log_p[...], None if so_perms is None: if k in SO_PERM_CACHE: so_perms = SO_PERM_CACHE[k] else: so_perms = all_2nd_order_perms(torch.arange(k, dtype=torch.long), device=log_p.device) SO_PERM_CACHE[k] = so_perms # perm_ids = all_perms(torch.arange(k - 2, dtype=torch.long), device=log_p.device) keys, rest = so_perms first, second = torch.unbind(keys, -1) norm1 = log1mexp(log_p[..., first]) norm2 = norm1 + log1mexp(log_p[..., second] - norm1) # Second order leave out log_probabilities log_P2s = log_p.new_zeros(log_p.size(0), k, k) if k > 2: # For k = 2, thre remainder set is empty with log probability zero # Index to get # (batch_size, num_second_orders, num_perms, rest=k-2) log_p_rest = log_p[..., rest] - norm2[..., None, None] # (batch_size, num_second_orders, num_perms) logprobs = log_pl_rec(log_p_rest, -1) # (batch_size, num_second_orders) log_P = logprobs.logsumexp(-1) log_P2s[:, first, second] = log_P log_P2s[:, second, first] = log_P # Compute first order log_P log_P1s = torch.zeros_like(log_p) for i in range(k): # P(S) = sum_{s in S} p(s) P^{D\s}(S\s) log_p_without_i = torch.cat((log_p[:, :i], log_p[:, i + 1:]), -1) - log1mexp(log_p[:, i, None]) log_P2s_without_i = torch.cat((log_P2s[:, i, :i], log_P2s[:, i, i + 1:]), -1) log_P1s[:, i] = (log_p_without_i + log_P2s_without_i).logsumexp(-1) log_P2s[:, i, i] = log_P1s[:, i] log_P = (log_p + log_P1s).logsumexp(-1) # Bit hacky but if we have (allmost) all probability mass on a few # categories we have numerical problems since the probability for other classes # is basically zero # In this case we can just compute an exact gradient # Whereas we can just compute an exact gradient by setting # We choose this where the probability mass > 1 - 1e-5, so approx logprob > -1e-5 is_exact = log_p.logsumexp(-1) > -1e-5 log_R1 = log_P1s - log_P[..., None] log_R2 = log_P2s - log_P1s[..., None] log_R1[is_exact] = 0 log_R2[is_exact] = 0 assert not torch.isnan(log_R1).any() assert not torch.isnan(log_R2).any() return log_R1, log_R2
def test_step_result_preds(self, batch, batch_idx, optimizer_idx=None): x, y = batch x = x.view(x.size(0), -1) y_hat = self(x) loss_test = self.loss(y, y_hat) # acc labels_hat = torch.argmax(y_hat, dim=1) test_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0) test_acc = torch.tensor(test_acc) test_acc = test_acc.type_as(x) # Do regular EvalResult Logging result = EvalResult(checkpoint_on=loss_test) result.log('test_loss', loss_test) result.log('test_acc', test_acc) batch_size = x.size(0) lst_of_str = [random.choice(['dog', 'cat']) for i in range(batch_size)] lst_of_int = [random.randint(500, 1000) for i in range(batch_size)] lst_of_lst = [[x] for x in lst_of_int] lst_of_dict = [{k: v} for k, v in zip(lst_of_str, lst_of_int)] # This is passed in from pytest via parameterization option = getattr(self, 'test_option', 0) prediction_file = getattr(self, 'prediction_file', 'predictions.pt') lazy_ids = torch.arange(batch_idx * self.batch_size, batch_idx * self.batch_size + x.size(0)) # Base if option == 0: result.write('idxs', lazy_ids, prediction_file) result.write('preds', labels_hat, prediction_file) # Check mismatching tensor len elif option == 1: result.write('idxs', torch.cat((lazy_ids, lazy_ids)), prediction_file) result.write('preds', labels_hat, prediction_file) # write multi-dimension elif option == 2: result.write('idxs', lazy_ids, prediction_file) result.write('preds', labels_hat, prediction_file) result.write('x', x, prediction_file) # write str list elif option == 3: result.write('idxs', lazy_ids, prediction_file) result.write('vals', lst_of_str, prediction_file) # write int list elif option == 4: result.write('idxs', lazy_ids, prediction_file) result.write('vals', lst_of_int, prediction_file) # write nested list elif option == 5: result.write('idxs', lazy_ids, prediction_file) result.write('vals', lst_of_lst, prediction_file) # write dict list elif option == 6: result.write('idxs', lazy_ids, prediction_file) result.write('vals', lst_of_dict, prediction_file) return result
def forward(self, features, labels=None, mask=None): """Compute loss for model. If both `labels` and `mask` are None, it degenerates to SimCLR unsupervised loss: https://arxiv.org/pdf/2002.05709.pdf Args: features: hidden vector of shape [bsz, n_views, ...]. labels: ground truth of shape [bsz]. mask: contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j has the same class as sample i. Can be asymmetric. Returns: A loss scalar. """ device = (torch.device('cuda') if features.is_cuda else torch.device('cpu')) if len(features.shape) < 3: raise ValueError('`features` needs to be [bsz, n_views, ...],' 'at least 3 dimensions are required') if len(features.shape) > 3: features = features.view(features.shape[0], features.shape[1], -1) batch_size = features.shape[0] if labels is not None and mask is not None: raise ValueError('Cannot define both `labels` and `mask`') elif labels is None and mask is None: mask = torch.eye(batch_size, dtype=torch.float32).to(device) elif labels is not None: labels = labels.contiguous().view(-1, 1) if labels.shape[0] != batch_size: raise ValueError( 'Num of labels does not match num of features') mask = torch.eq(labels, labels.T).float().to(device) else: mask = mask.float().to(device) contrast_count = features.shape[1] contrast_feature = torch.cat(torch.unbind(features, dim=1), dim=0) if self.contrast_mode == 'one': anchor_feature = features[:, 0] anchor_count = 1 elif self.contrast_mode == 'all': anchor_feature = contrast_feature anchor_count = contrast_count else: raise ValueError('Unknown mode: {}'.format(self.contrast_mode)) # compute logits anchor_dot_contrast = torch.div( torch.matmul(anchor_feature, contrast_feature.T), self.temperature) # for numerical stability logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True) logits = anchor_dot_contrast - logits_max.detach() # tile mask mask = mask.repeat(anchor_count, contrast_count) # mask-out self-contrast cases logits_mask = torch.scatter( torch.ones_like(mask), 1, torch.arange(batch_size * anchor_count).view(-1, 1).to(device), 0 ) mask = mask * logits_mask # compute log_prob exp_logits = torch.exp(logits) * logits_mask log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True)) # compute mean of log-likelihood over positive mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1) # loss loss = - (self.temperature / self.base_temperature) * mean_log_prob_pos loss = loss.view(anchor_count, batch_size).mean() return loss
def make_ix_like(input, dim=0): d = input.size(dim) rho = torch.arange(1, d + 1, device=input.device, dtype=input.dtype) view = [1] * input.dim() view[0] = -1 return rho.view(view).transpose(0, dim)
node_embs = torch.randn((n_nodes, emb_dims), requires_grad=True) attn_dist = None node_attns = [None] * n_nodes trans_attns = [dict() for _ in range(n_nodes)] trans_norm_factors = [dict() for _ in range(n_nodes)] node_outs = [None] * n_nodes state_fn = torch.nn.Linear(emb_dims, emb_dims) transform_fn = [torch.nn.Linear(emb_dims, emb_dims) for _ in range(n_nodes)] a2w_fn = lambda x: x # input input = torch.randn((batch_size, emb_dims), requires_grad=True) # for master input node_outs[0] = (input, torch.arange(batch_size)) out_nei_ids = nodes[master_input]['out_neis'] out, subbat_idx = node_outs[0] state = state_fn(out) out_nei_embs = node_embs.index_select(0, torch.tensor(out_nei_ids)) transition = torch.tensordot(state, out_nei_embs, dims=([1], [1])).softmax(1) attn_sent = transition attn_dist = torch.zeros((subbat_idx.size(0), n_nodes)).index_copy_(1, torch.tensor(out_nei_ids), attn_sent.data) V, I = attn_dist.topk(k) mask = torch.zeros_like(attn_dist).scatter_(1, I, torch.ones(I.size())) mask_gt = torch.gt(attn_dist, epsilon).float() mask.mul_(mask_gt) V_gt = torch.gt(V, epsilon).float() V.mul_(V_gt)
def make_dxx_lut(layout, block, step, trans, device, transform=lambda idx: idx): # load-balancing _empty = torch.tensor([], dtype=torch.int64, device=layout.device) segments = _empty.clone() column = _empty.clone() depth = _empty.clone() lockid = _empty.clone() maxid = _empty.clone() offsets = _empty.clone() current_offset = 0 current_maxid = 0 for z in range(layout.size(0)): if trans: sizes = torch.sum(layout[z, :, :], 1) else: sizes = torch.sum(layout[z, :, :], 0) z_segments, z_column, z_lockid, z_maxid, z_offsets = _sparse_matmul.load_balance( sizes, block) z_depth = z * torch.ones_like(z_segments) z_lockid[z_lockid > 0] += current_maxid current_maxid = z_lockid.max() # concatenate depth segments = torch.cat((segments, z_segments)) column = torch.cat((column, z_column)) depth = torch.cat((depth, z_depth)) maxid = torch.cat((maxid, z_maxid)) offsets = torch.cat((offsets, current_offset + z_offsets)) lockid = torch.cat((lockid, z_lockid)) current_offset += layout[z, :, :].sum() segments *= step # pointer increments if trans: nnz = layout.nonzero() else: nnz = layout.transpose(1, 2).nonzero() num_blocks = nnz.size(0) offsets = torch.min(offsets, (num_blocks - 1) * torch.ones_like(offsets)) idx = transform(nnz[:, 2] * block) xincs = idx.clone() xincs[1:] -= idx[:-1] # divide block into multiple steps div = block // step xincs = xincs.view(-1, 1).repeat(1, div) xincs[:, 1:] = step xincs[:, 0] -= (div - 1) * step # first increment for each reduction is actually the offset xincs[offsets[segments > 0], 0] = idx[offsets[segments > 0]] xincs = xincs.view(-1) # block-mode input increments if trans: widx = torch.arange(num_blocks) else: widx = _empty.clone() current_offset = 0 for z in range(layout.size(0)): layoutw = layout[z, :, :].clone() msum = layoutw.sum() layoutw[layoutw > 0] = 1 + torch.arange(msum) widx = torch.cat( (widx, current_offset + layoutw.T[layoutw.T > 0] - 1)) current_offset += msum widx = widx wincs = widx * block * block wincs[1:] -= widx[:-1] * block * block wincs = wincs.view(-1, 1).repeat(1, div) if trans: wincs[:, 1:] = step wincs[:, 0] -= (div - 1) * step else: wincs[:, 1:] = step * block wincs[:, 0] -= (div - 1) * step * block wincs[offsets[segments > 0], 0] = widx[offsets[segments > 0]] wincs = wincs.view(-1) # adjust offset and segment size offsets *= 2 * div segments *= div # create header width = column.size(0) offsets += 6 * width header = torch.stack((offsets, segments, column, depth, lockid, maxid), dim=1).view(-1).contiguous() incs = torch.stack((xincs, wincs), dim=1).view(-1).contiguous() incs = torch.cat( (incs, torch.zeros(2, device=incs.device, dtype=incs.dtype))) # create lut lut = torch.cat((header, incs)) lut = lut.type(torch.int32).to(device) # create locks num_locks = max(1, lockid.max()) return lut, num_locks, width, None
def forward(self, pts, fts, qrs): N = pts.shape[0] # batch size point_num = pts.shape[1] # xconv operation _, indices_dilated = self.knn_indices_general(qrs, pts, True) indices = indices_dilated[:, :, ::self. dilation, :] # indices of K neaerest(dilation: d) points indices = ( indices.view(-1, 2)[:, 1].cpu() + torch.arange(0, N * point_num, point_num).view(-1, 1).repeat( 1, self.P * self.K).view(-1)).cpu().numpy() if self.sorting_method is not None: raise NotImplementedError nn_pts = (pts.contiguous().view(-1, 3))[indices].view( N, self.P, self.K, 3) # coordinates of nearest-neighbour points nn_pts_center = qrs.unsqueeze( dim=2) # (N, P, 1, 3) # coordinates of queries nn_pts_local_origin = nn_pts - nn_pts_center # (N, P, K, 3) # relative coordinates knn_pts_len = torch.norm( nn_pts_local_origin, dim=3, keepdim=False).detach() # (N,P,K) # stop gradient here! nn_pts_max_len = torch.unsqueeze(torch.mean(knn_pts_len, dim=-1, keepdim=True), dim=-1) # (N,P,1,1) nn_pts_local = nn_pts_local_origin / nn_pts_max_len nn_fts_from_pts_0 = self._modules['BN1'].forward( (self._modules['dense1'].forward(nn_pts_local.view(-1, 3)))).view( N, self.P, self.K, self.C_pts_fts) nn_fts_from_pts = self._modules['BN2'].forward( self._modules['dense2'].forward(nn_fts_from_pts_0.view(-1, self.C_pts_fts))).view(N, self.P, self.K,\ self.C_pts_fts) # shape: (N,P,K,C_pts_fts) if fts is None: nn_fts_input = nn_fts_from_pts # no concat! else: nn_fts_from_prev = (fts.contiguous().view( N * point_num, -1))[indices].contiguous().view(N, self.P, self.K, -1) # the F matrix nn_fts_input = torch.cat([nn_fts_from_pts, nn_fts_from_prev], dim=-1) if self.with_X_transformation: ######################## X-transformation ######################### nn_pts_local = nn_pts_local.transpose(1, 3).transpose(2, 3) # (N,3,P,K) X_0 = self._modules["x_trans_conv1"].forward(nn_pts_local) X_0_KK = X_0.view(N, self.K, self.K, self.P).transpose(1, 2).transpose(2, 3) # (N,K,P,K) X_1 = self._modules['x_trans_depthConv1'].forward( X_0_KK) # (N,K*K,P,1) X_1_KK = X_1.view(N, self.K, self.K, self.P).transpose(1, 2).transpose(2, 3) # (N,K,P,K) X_2 = self._modules['x_trans_depthConv2'].forward( X_1_KK) # (N,K*K,P,1) X_2_KK = X_2.view(N, self.K, self.K, self.P).transpose(1, 2).transpose(2, 3) # (N,K,P,K) X_2_KK = X_2_KK.transpose(1, 2).transpose( 2, 3) # (N,P,K,K) # output of Step 4 of algorithm 1 fts_X = torch.matmul( X_2_KK, nn_fts_input) # output of Step 5 of algorithm 1 ################################################################### else: fts_X = nn_fts_input fts_conv_3d = self._modules['fts_conv'].forward( fts_X.transpose(1, 3).transpose(2, 3)).transpose( 1, 2).contiguous().view(-1, self.C) fts_conv_3d = self._modules["fts_conv_BN"].forward(fts_conv_3d).view( N, self.P, self.C) # (N,P,C) if self.late_bn: raise NotImplementedError if self.with_global: fts_global = self._modules['dense3'].forward(qrs) return torch.cat([fts_global, fts_conv_3d], dim=-1) else: return fts_conv_3d
def summarization_step(self, lambda_coeff): """ Machine translation step. Can also be used for denoising auto-encoding. """ assert lambda_coeff >= 0 if lambda_coeff == 0: return params = self.params self.encoder.train() self.decoder.train() (table_entities, table_types, table_values, table_feats, table_labels, summaries, summary_labels) = self.get_batch('sm') enc_x1, enc_xlen = table_entities enc_x2, _ = table_types enc_x3, _ = table_values enc_x4, _ = table_feats enc_label, _ = table_labels dec_x, dec_xlen = summaries seq_length, batch_size = dec_x.size() # target words to predict alen = torch.arange(dec_xlen.max(), dtype=torch.long, device=dec_xlen.device) pred_mask = alen[:, None] < dec_xlen[None] - 1 # do not predict anything given the last target word dec_y = dec_x[1:].masked_select(pred_mask[:-1]) assert len(dec_y) == (dec_xlen - 1).sum().item() # cuda if params.cuda: enc_x1, enc_x2, enc_x3, enc_x4, enc_xlen = to_cuda(enc_x1, enc_x2, enc_x3, enc_x4, enc_xlen) dec_x, dec_xlen, dec_y = to_cuda(dec_x, dec_xlen, dec_y) # encode source sentence encoder_output = self.encoder('fwd', x1=enc_x1, x2=enc_x2, x3=enc_x3, x4=enc_x4, lengths=enc_xlen) if params.sm_step_with_cs_proba: scores = self.encoder('score', tensor=encoder_output) encoder_output = encoder_output * scores encoder_output = encoder_output.transpose(0, 1) # decode target sentence decoder_output = self.decoder('fwd', x=dec_x, lengths=dec_xlen, causal=True, src_enc=encoder_output, src_len=enc_xlen) _, loss = self.decoder('predict', tensor=decoder_output, pred_mask=pred_mask, y=dec_y) self.stats['sm'].append(loss.item()) loss = lambda_coeff * loss # optimize self.optimize(loss, ['encoder', 'decoder']) # number of processed sentences / words self.n_sentences += params.batch_size self.stats['processed_s'] += dec_xlen.size(0) self.stats['processed_w'] += (dec_xlen - 1).sum().item() self.stats['lambda_sm'] = lambda_coeff
def kNN(args, C, model, average, trainloader, testloader, K, recompute_memory=0): model.eval() model_time = AverageMeter() cluster_time = AverageMeter() total = 0 testsize = testloader.dataset.__len__() ndata = trainloader.dataset.__len__() if recompute_memory: trainFeatures = torch.zeros(ndata, args.low_dim).cuda() else: trainFeatures = average.memory # (num_samples, low_dim) # this is cifar10 # trainLabels = torch.tensor(trainloader.dataset.targets).long().cuda() # this is for UCF101 or Kinetics trainLabels = torch.tensor([sample['label'] for sample in trainloader.dataset.data]).long().cuda() if recompute_memory: print('\nRecomputing memory bank....') # use test transform to go through all train samples and retrieve features as memory # transform_bak = trainloader.dataset.transform # trainloader.dataset.transform = testloader.dataset.transform temploader = torch.utils.data.DataLoader(trainloader.dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_threads, pin_memory=True) memory_idx = torch.arange(args.batch_size * args.clips_num).view(args.clips_num, args.batch_size).t().cuda() batchSize = args.batch_size with torch.no_grad(): for batch_idx, (inputs, _, indices) in enumerate(temploader): inputs = torch.cat(inputs, dim=0) inputs = inputs.cuda() bs = inputs.size(0) _, _, features = model(inputs) if batch_idx == len(temploader) - 1: batch_size = bs // args.clips_num memory_idx = torch.arange(bs).view(args.clips_num, batch_size).t().cuda() f_means = torch.mean(features[memory_idx], dim=1) trainFeatures[batch_idx * batchSize:, :] = f_means else: f_means = torch.mean(features[memory_idx], dim=1) # (batchSize, dim) trainFeatures[batch_idx * batchSize : (batch_idx+1) * batchSize, :] = f_means # trainloader.dataset.transform = transform_bak print('Finished!') top1 = 0 top5 = 0 # save plt distribution # Yd = torch.zeros(testsize, K).cuda() # NN_labels = torch.zeros(testsize, K).long().cuda() # labels = torch.zeros(testsize).long().cuda() with torch.no_grad(): retrieval_one_hot = torch.zeros(K, C).cuda() for batch_idx, (inputs, targets, _) in enumerate(testloader): end = time.time() targets = targets.cuda() inputs = inputs.cuda() batchSize = inputs.size(0) _, _, features = model(inputs) total += targets.size(0) model_time.update(time.time() - end) end = time.time() # dist = pearson_coefficient_bank(features, trainFeatures.t()) dist = torch.mm(features, trainFeatures.t()) yd, yi = dist.topk(K, dim=1, largest=True, sorted=True) candidates = trainLabels.view(1, -1).expand(batchSize, -1) retrieval = torch.gather(candidates, 1, yi) # if batch_idx == 0: # show_distribution(yd.cpu(), retrieval.cpu(), targets.cpu()) # if batch_idx < len(testloader)-1: # Yd[batch_idx*batchSize : (batch_idx+1)*batchSize, :] = yd # NN_labels[batch_idx*batchSize : (batch_idx+1)*batchSize, :] = retrieval # labels[batch_idx*batchSize : (batch_idx+1)*batchSize] = targets # else: # Yd[batch_idx*batchSize:, :] = yd # NN_labels[batch_idx*batchSize:, :] = retrieval # labels[batch_idx*batchSize:] = targets retrieval_one_hot.resize_(batchSize * K, C).zero_() retrieval_one_hot.scatter_(1, retrieval.view(-1, 1), 1) # inverse operation of torch.gather yd_transform = torch.exp(torch.div(yd, args.nce_t)) # apply softmax with temperature for (non-parametric logits) probs = torch.sum(torch.mul(retrieval_one_hot.view(batchSize, -1 , C), yd_transform.view(batchSize, -1, 1)), 1) _, predictions = probs.sort(1, True) # find which predictions match the target correct = predictions.eq(targets.view(-1, 1)) cluster_time.update(time.time() - end) top1 = top1 + correct.narrow(1, 0, 1).sum().item() top5 = top5 + correct.narrow(1, 0, 5).sum().item() if (batch_idx+1) % 100 == 0: print('Test [{}/{}]\t' 'Model time: {model_time.val:.3f} ({model_time.avg:.3f})\t' 'Cluster time: {cluster_time.val:.3f} ({cluster_time.avg:.3f})\t' 'Top1: {:.2f} Top5: {:.2f}'.format(batch_idx+1, len(testloader), top1*100./total, top5*100./total, model_time=model_time, cluster_time=cluster_time)) print(top1*100./total) return top1*100./total, top5*100./total
def fit(self, training_data, loss_func='kl', p_ij=None, pretrain=False, epochs=10, verbose=False, optimizer=torch.optim.Adam, batch_size=500, learning_rate=0.01): assert training_data.shape[1] == self.input_dim, "Input training data must be same shape as training `num_inputs`" self.p_ij = p_ij self._epochs = epochs if pretrain: self.pretrain(training_data, epochs=5, verbose=verbose, batch_size=batch_size) if self.p_ij is None: self.p_ij = p_ij_sym(training_data.detach().cpu().numpy(), self.perplexity, verbose=verbose).toarray() dataset = torch.utils.data.TensorDataset(training_data, torch.arange(training_data.shape[0])) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True) optim = optimizer(self.parameters(), lr=learning_rate) if verbose: print('{time}: Beginning training for {epochs} epochs'.format( time=datetime.datetime.now(), epochs=epochs)) loss_func = { 'kl': kullback_leibler_loss, 'kl_rev': kullback_leibler_reverse_loss, 'js': jensen_shannon_loss, 'frob': frobenius_loss, #'bat': bhattacharyya_loss, 'tot': total_variational_loss }[loss_func] for epoch in range(epochs): running_loss = 0 for batch, data in enumerate(dataloader): features, indices = data p = submatrix(self.p_ij, indices.numpy()) p = p / p.sum() if epoch < 10: # exaggeration test exaggeration = 10. p *= exaggeration if self.use_cuda: features = features.cuda() p = p.cuda() optim.zero_grad() q = q_ij(self(features), self.alpha) q = q / q.sum() loss = loss_func(p, q) if epoch < 10: # exaggeration test loss = loss / exaggeration - np.log(exaggeration) loss.backward() optim.step() running_loss += loss.item() if verbose: print('{time}: Loss after epoch {ep}: {rloss}'.format( time=datetime.datetime.now(), ep=epoch+1, rloss=running_loss)) if verbose: print('{time}: Finished training'.format( time=datetime.datetime.now()))
def _get_indexes_ce(predictions, targets, current_max_prob): predictions = torch.nn.functional.softmax(predictions, dim=1) return predictions[torch.arange(predictions.size(0)), targets] < current_max_prob
def forward(self, fpn_fms,rpn_rois, rpn_rois_inds = None, im_info = None, gt_boxes=None, isEval = False, flip_fms = None, extra = {}): if self.training or isEval: with torch.no_grad(): rcnn_rois, rcnn_labels, rcnn_bbox_targets = fpn_roi_target( rpn_rois, rpn_rois_inds, im_info, gt_boxes, top_k=2) else: rcnn_rois = rpn_rois pred_ref_pred_cls, pred_ref_pred_delta, pred_cls_unrefined, pred_delta_unrefined, \ pool_features = self._recursive_forward(fpn_fms, rcnn_rois, keep_pool_feature = True) if self.training or isEval: #loss_rcnn = emd_loss_multi(pred_delta_unrefined, pred_cls_unrefined,rcnn_bbox_targets,rcnn_labels,top_k=2) #loss_ref = emd_loss_multi(pred_ref_pred_delta, pred_ref_pred_cls,rcnn_bbox_targets,rcnn_labels,top_k=2) loss0 = emd_loss( pred_delta_unrefined[0], pred_cls_unrefined[0], pred_delta_unrefined[1], pred_cls_unrefined[1], rcnn_bbox_targets, rcnn_labels) loss1 = emd_loss( pred_delta_unrefined[1], pred_cls_unrefined[1], pred_delta_unrefined[0], pred_cls_unrefined[0], rcnn_bbox_targets, rcnn_labels) loss2 = emd_loss( pred_ref_pred_delta[0], pred_ref_pred_cls[0], pred_ref_pred_delta[1], pred_ref_pred_cls[1], rcnn_bbox_targets, rcnn_labels) loss3 = emd_loss( pred_ref_pred_delta[1], pred_ref_pred_cls[1], pred_ref_pred_delta[0], pred_ref_pred_cls[0], rcnn_bbox_targets, rcnn_labels) loss_rcnn = torch.cat([loss0, loss1], axis=1) loss_ref = torch.cat([loss2, loss3], axis=1) loss_rcnn = torch.cat([loss0, loss1], axis=1) loss_ref = torch.cat([loss2, loss3], axis=1) with torch.no_grad(): _, min_indices_rcnn = loss_rcnn.min(axis=1) _, min_indices_ref = loss_ref.min(axis=1) loss_rcnn = loss_rcnn[torch.arange(loss_rcnn.shape[0]), min_indices_rcnn] loss_rcnn = loss_rcnn.sum()/loss_rcnn.shape[0] loss_ref = loss_ref[torch.arange(loss_ref.shape[0]), min_indices_ref] loss_ref = loss_ref.sum()/loss_ref.shape[0] loss_dict = {} loss_dict['loss_rcnn_emd'] = loss_rcnn loss_dict['loss_ref_emd'] = loss_ref if self.args.flip_JSD: if self.args.flip_JSD_0g: with torch.no_grad(): f_pred_ref_pred_cls, _, pred_cls_unrefined, _ = self._recursive_forward(flip_fms, rcnn_rois) else: f_pred_ref_pred_cls, _ = self._recursive_forward(flip_fms, rcnn_rois) loss_flip_JSD = _flip_loss_JSD(F.softmax(pred_ref_pred_cls[0], dim=-1),F.softmax(f_pred_ref_pred_cls[0], dim=-1)) loss_flip_JSD += _flip_loss_JSD(F.softmax(pred_ref_pred_cls[1], dim=-1),F.softmax(f_pred_ref_pred_cls[1], dim=-1)) loss_dict['loss_flip_JSD'] = loss_flip_JSD return loss_dict else: pred_bboxes = None for p_cls,p_delta in zip(pred_ref_pred_cls,pred_ref_pred_delta): pred_ref_scores = F.softmax(p_cls, dim=-1) pred_bbox = restore_bbox(rcnn_rois[:, 1:5], p_delta, True) if pred_bboxes is None: pred_bboxes = torch.cat([pred_bbox, pred_ref_scores[:, 1].reshape(-1,1)], dim=1) else: pred_bbox = torch.cat([pred_bbox, pred_ref_scores[:, 1].reshape(-1,1)], dim=1) pred_bboxes = torch.cat([pred_bboxes, pred_bbox], dim=0) #pred_bbox = torch.cat((pred_bbox_0, pred_bbox_1), dim=1).reshape(-1,5) return pred_bboxes
def run(proc_id, n_gpus, args, devices, data): # Unpack data device = devices[proc_id] if n_gpus > 1: dist_init_method = 'tcp://{master_ip}:{master_port}'.format( master_ip='127.0.0.1', master_port='12345') world_size = n_gpus th.distributed.init_process_group(backend="nccl", init_method=dist_init_method, world_size=world_size, rank=proc_id) train_mask, val_mask, test_mask, n_classes, g = data nfeat = g.ndata.pop('feat') labels = g.ndata.pop('label') in_feats = nfeat.shape[1] train_nid = th.LongTensor(np.nonzero(train_mask)).squeeze() val_nid = th.LongTensor(np.nonzero(val_mask)).squeeze() test_nid = th.LongTensor(np.nonzero(test_mask)).squeeze() # Create PyTorch DataLoader for constructing blocks n_edges = g.num_edges() train_seeds = np.arange(n_edges) if n_gpus > 0: num_per_gpu = (train_seeds.shape[0] + n_gpus - 1) // n_gpus train_seeds = train_seeds[proc_id * num_per_gpu : (proc_id + 1) * num_per_gpu \ if (proc_id + 1) * num_per_gpu < train_seeds.shape[0] else train_seeds.shape[0]] # Create sampler sampler = dgl.dataloading.MultiLayerNeighborSampler( [int(fanout) for fanout in args.fan_out.split(',')]) dataloader = dgl.dataloading.EdgeDataLoader( g, train_seeds, sampler, exclude='reverse_id', # For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2. reverse_eids=th.cat( [th.arange(n_edges // 2, n_edges), th.arange(0, n_edges // 2)]), negative_sampler=NegativeSampler(g, args.num_negs, args.neg_share), batch_size=args.batch_size, shuffle=True, drop_last=False, pin_memory=True, num_workers=args.num_workers) # Define model and optimizer model = SAGE(in_feats, args.num_hidden, args.num_hidden, args.num_layers, F.relu, args.dropout) model = model.to(device) if n_gpus > 1: model = DistributedDataParallel(model, device_ids=[device], output_device=device) loss_fcn = CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) # Training loop avg = 0 iter_pos = [] iter_neg = [] iter_d = [] iter_t = [] best_eval_acc = 0 best_test_acc = 0 for epoch in range(args.num_epochs): tic = time.time() # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. tic_step = time.time() for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(dataloader): batch_inputs = nfeat[input_nodes].to(device) d_step = time.time() pos_graph = pos_graph.to(device) neg_graph = neg_graph.to(device) blocks = [block.int().to(device) for block in blocks] # Compute loss and prediction batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, pos_graph, neg_graph) optimizer.zero_grad() loss.backward() optimizer.step() t = time.time() pos_edges = pos_graph.num_edges() neg_edges = neg_graph.num_edges() iter_pos.append(pos_edges / (t - tic_step)) iter_neg.append(neg_edges / (t - tic_step)) iter_d.append(d_step - tic_step) iter_t.append(t - d_step) if step % args.log_every == 0: gpu_mem_alloc = th.cuda.max_memory_allocated( ) / 1000000 if th.cuda.is_available() else 0 print( '[{}]Epoch {:05d} | Step {:05d} | Loss {:.4f} | Speed (samples/sec) {:.4f}|{:.4f} | Load {:.4f}| train {:.4f} | GPU {:.1f} MB' .format(proc_id, epoch, step, loss.item(), np.mean(iter_pos[3:]), np.mean(iter_neg[3:]), np.mean(iter_d[3:]), np.mean(iter_t[3:]), gpu_mem_alloc)) tic_step = time.time() if step % args.eval_every == 0 and proc_id == 0: eval_acc, test_acc = evaluate(model, g, nfeat, labels, train_nid, val_nid, test_nid, device) print('Eval Acc {:.4f} Test Acc {:.4f}'.format( eval_acc, test_acc)) if eval_acc > best_eval_acc: best_eval_acc = eval_acc best_test_acc = test_acc print('Best Eval Acc {:.4f} Test Acc {:.4f}'.format( best_eval_acc, best_test_acc)) toc = time.time() if proc_id == 0: print('Epoch Time(s): {:.4f}'.format(toc - tic)) if epoch >= 5: avg += toc - tic if n_gpus > 1: th.distributed.barrier() if proc_id == 0: print('Avg epoch time: {}'.format(avg / (epoch - 4)))
def interpolate_bilinear(grid, query_points, name='interpolate_bilinear', indexing='ij'): """Similar to Matlab's interp2 function. Finds values for query points on a grid using bilinear interpolation. Args: grid: a 4-D float `Tensor` of shape `[batch, height, width, channels]`. query_points: a 3-D float `Tensor` of N points with shape `[batch, N, 2]`. name: a name for the operation (optional). indexing: whether the query points are specified as row and column (ij), or Cartesian coordinates (xy). Returns: values: a 3-D `Tensor` with shape `[batch, N, channels]` Raises: ValueError: if the indexing mode is invalid, or if the shape of the inputs invalid. """ if indexing != 'ij' and indexing != 'xy': raise ValueError('Indexing mode must be \'ij\' or \'xy\'') shape = grid.shape if len(shape) != 4: msg = 'Grid must be 4 dimensional. Received size: ' raise ValueError(msg + str(grid.shape)) batch_size, height, width, channels = grid.shape shape = [batch_size, height, width, channels] query_type = query_points.dtype grid_type = grid.dtype num_queries = query_points.shape[1] # print('Num queries', num_queries) alphas = [] floors = [] ceils = [] index_order = [0, 1] if indexing == 'ij' else [1, 0] # print(query_points.shape) unstacked_query_points = query_points.unbind(2) # print('Squeezed query_points', unstacked_query_points[0].shape, unstacked_query_points[1].shape) for dim in index_order: queries = unstacked_query_points[dim] size_in_indexing_dimension = shape[dim + 1] # max_floor is size_in_indexing_dimension - 2 so that max_floor + 1 # is still a valid index into the grid. max_floor = torch.tensor(size_in_indexing_dimension - 2, dtype=query_type) min_floor = torch.tensor(0.0, dtype=query_type) maxx = torch.max(min_floor, torch.floor(queries)) floor = torch.min(maxx, max_floor) int_floor = floor.long() floors.append(int_floor) ceil = int_floor + 1 ceils.append(ceil) # alpha has the same type as the grid, as we will directly use alpha # when taking linear combinations of pixel values from the image. alpha = torch.tensor(queries - floor, dtype=grid_type) min_alpha = torch.tensor(0.0, dtype=grid_type) max_alpha = torch.tensor(1.0, dtype=grid_type) alpha = torch.min(torch.max(min_alpha, alpha), max_alpha) # Expand alpha to [b, n, 1] so we can use broadcasting # (since the alpha values don't depend on the channel). alpha = torch.unsqueeze(alpha, 2) alphas.append(alpha) flattened_grid = torch.reshape(grid, [batch_size * height * width, channels]) batch_offsets = torch.reshape( torch.arange(batch_size) * height * width, [batch_size, 1]) # This wraps array_ops.gather. We reshape the image data such that the # batch, y, and x coordinates are pulled into the first dimension. # Then we gather. Finally, we reshape the output back. It's possible this # code would be made simpler by using array_ops.gather_nd. def gather(y_coords, x_coords, name): linear_coordinates = batch_offsets + y_coords * width + x_coords gathered_values = torch.gather(flattened_grid.t(), 1, linear_coordinates) return torch.reshape(gathered_values, [batch_size, num_queries, channels]) # grab the pixel values in the 4 corners around each query point top_left = gather(floors[0], floors[1], 'top_left') top_right = gather(floors[0], ceils[1], 'top_right') bottom_left = gather(ceils[0], floors[1], 'bottom_left') bottom_right = gather(ceils[0], ceils[1], 'bottom_right') interp_top = alphas[1] * (top_right - top_left) + top_left interp_bottom = alphas[1] * (bottom_right - bottom_left) + bottom_left interp = alphas[0] * (interp_bottom - interp_top) + interp_top return interp
def forward(self, x, scale, gt_bboxes, gt_labels, original_size=None): if self.training: img_size = tuple(x.shape[2:]) # Feature extractor from the base network(e.g. VGG16, ResNet-101) feature = self._extract_features(x) # Region Proposal Network rpn_result = self.rpn(feature, img_size, scale, gt_bboxes[0], gt_labels[0]) roi, gt_roi_loc, gt_roi_label, rpn_loc_loss, rpn_cls_loss = rpn_result # RoI Pooling Layer roi_pool_feat = self._roi_pool(feature, roi) # bbox regression & classification roi_loc, roi_score = self._bbox_regression_and_classification(roi_pool_feat) # Faster R-CNN loss n_sample = roi_loc.shape[0] roi_loc = roi_loc.view(n_sample, -1, 4) roi_loc = roi_loc[t.arange(0, n_sample).long().cuda(), at.totensor(gt_roi_label).long()] gt_roi_loc = at.totensor(gt_roi_loc) gt_roi_label = at.totensor(gt_roi_label).long() roi_loc_loss = _bbox_regression_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma ) roi_cls_loss = F.cross_entropy(roi_score, gt_roi_label.cuda()) # Stack losses losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) else: with t.no_grad(): x = at.totensor(x).float() img_size = tuple(x.shape[2:]) # Feature extractor from the base network(e.g. VGG16, ResNet) feature = self._extract_features(x) # Region Proposal Network roi = self.rpn(feature, img_size, scale, None, None) # RoI Pooling Layer roi_pool_feat = self._roi_pool(feature, roi) # bbox regression & classification roi_loc, roi_score = self._bbox_regression_and_classification(roi_pool_feat) roi_loc = roi_loc.data roi_score = roi_score.data roi = at.totensor(roi) / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = t.tensor(self.loc_normalize_mean).cuda(). \ repeat(self.n_class)[None] std = t.tensor(self.loc_normalize_std).cuda(). \ repeat(self.n_class)[None] roi_loc = (roi_loc * std + mean) roi_loc = roi_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_loc) bbox = loc2bbox(at.tonumpy(roi).reshape(-1, 4), at.tonumpy(roi_loc).reshape(-1, 4)) bbox = at.totensor(bbox) bbox = bbox.view(-1, self.n_class * 4) # clip bbox bbox[:, 0::2] = bbox[:, 0::2].clamp(min=0, max=original_size[0]) bbox[:, 1::2] = bbox[:, 1::2].clamp(min=0, max=original_size[1]) prob = F.softmax(at.totensor(roi_score), dim=1) bbox, label, score = self._suppress(bbox, prob) return bbox, label, score
def __call__(self, X, y): if not len(X.shape) == 2: raise ValueError("Expected X to have two dimensions but found %d." % len(X.shape)) return torch.arange(0, X.shape[1])
def main(): parser = argparse.ArgumentParser() ## Required parameters parser.add_argument( "--bert_model", default=None, type=str, required=True, help="Bert pre-trained model selected in the list: bert-base-uncased, " "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese." ) parser.add_argument( "--output_dir", default=None, type=str, required=True, help= "The output directory where the model checkpoints and predictions will be written." ) ## Other parameters parser.add_argument("--train_file", default=None, type=str, help="SQuAD json for training. E.g., train-v1.1.json") parser.add_argument( "--predict_file", default=None, type=str, help="SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json" ) parser.add_argument( "--max_seq_length", default=384, type=int, help= "The maximum total input sequence length after WordPiece tokenization. Sequences " "longer than this will be truncated, and sequences shorter than this will be padded." ) parser.add_argument( "--doc_stride", default=128, type=int, help= "When splitting up a long document into chunks, how much stride to take between chunks." ) parser.add_argument( "--max_query_length", default=64, type=int, help= "The maximum number of tokens for the question. Questions longer than this will " "be truncated to this length.") parser.add_argument("--do_train", default=False, action='store_true', help="Whether to run training.") parser.add_argument("--do_predict", default=False, action='store_true', help="Whether to run eval on the dev set.") parser.add_argument("--train_batch_size", default=32, type=int, help="Total batch size for training.") parser.add_argument("--predict_batch_size", default=8, type=int, help="Total batch size for predictions.") parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform.") parser.add_argument( "--warmup_proportion", default=0.1, type=float, help= "Proportion of training to perform linear learning rate warmup for. E.g., 0.1 = 10% " "of training.") parser.add_argument( "--n_best_size", default=20, type=int, help= "The total number of n-best predictions to generate in the nbest_predictions.json " "output file.") parser.add_argument( "--max_answer_length", default=30, type=int, help= "The maximum length of an answer that can be generated. This is needed because the start " "and end predictions are not conditioned on one another.") parser.add_argument( "--verbose_logging", default=False, action='store_true', help= "If true, all of the warnings related to data processing will be printed. " "A number of warnings are expected for a normal SQuAD evaluation.") parser.add_argument("--no_cuda", default=False, action='store_true', help="Whether not to use CUDA when available") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") parser.add_argument( '--gradient_accumulation_steps', type=int, default=1, help= "Number of updates steps to accumulate before performing a backward/update pass." ) parser.add_argument( "--do_lower_case", action='store_true', help= "Whether to lower case the input text. True for uncased models, False for cased models." ) parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus") parser.add_argument( '--fp16', default=False, action='store_true', help="Whether to use 16-bit float precision instead of 32-bit") parser.add_argument( '--loss_scale', type=float, default=0, help= "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n" "0 (default value): dynamic loss scaling.\n" "Positive power of 2: static loss scaling value.\n") parser.add_argument( '--null_score_diff_threshold', type=float, default=0.0, help= "If null_score - best_non_null is greater than the threshold predict null." ) args = parser.parse_args() if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") n_gpu = torch.cuda.device_count() else: torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) n_gpu = 1 # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.distributed.init_process_group(backend='nccl') logger.info( "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}". format(device, n_gpu, bool(args.local_rank != -1), args.fp16)) if args.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1" .format(args.gradient_accumulation_steps)) args.train_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) if not args.do_train and not args.do_predict: raise ValueError( "At least one of `do_train` or `do_predict` must be True.") if args.do_train: if not args.train_file: raise ValueError( "If `do_train` is True, then `train_file` must be specified.") if args.do_predict: if not args.predict_file: raise ValueError( "If `do_predict` is True, then `predict_file` must be specified." ) if os.path.exists(args.output_dir) and os.listdir(args.output_dir): raise ValueError( "Output directory () already exists and is not empty.") os.makedirs(args.output_dir, exist_ok=True) tokenizer = BertTokenizer.from_pretrained(args.bert_model) train_examples = None num_train_steps = None if args.do_train: train_examples = read_squad_examples(input_file=args.train_file, is_training=True) num_train_steps = int( len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps * args.num_train_epochs) # Prepare model model = BertForQuestionAnswering.from_pretrained( args.bert_model, cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(args.local_rank)) if args.fp16: model.half() model.to(device) if args.local_rank != -1: try: from apex.parallel import DistributedDataParallel as DDP except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) model = DDP(model) elif n_gpu > 1: model = torch.nn.DataParallel(model) # Prepare optimizer param_optimizer = list(model.named_parameters()) # hack to remove pooler, which is not used # thus it produce None grad that break apex param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]] no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] t_total = num_train_steps if args.local_rank != -1: t_total = t_total // torch.distributed.get_world_size() if args.fp16: try: from apex.optimizers import FP16_Optimizer from apex.optimizers import FusedAdam except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) optimizer = FusedAdam(optimizer_grouped_parameters, lr=args.learning_rate, bias_correction=False, max_grad_norm=1.0) if args.loss_scale == 0: optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale) else: optimizer = BertAdam(optimizer_grouped_parameters, lr=args.learning_rate, warmup=args.warmup_proportion, t_total=t_total) global_step = 0 if args.do_train: cached_train_features_file = args.train_file + '_{0}_{1}_{2}_{3}'.format( args.bert_model, str(args.max_seq_length), str(args.doc_stride), str(args.max_query_length)) train_features = None try: with open(cached_train_features_file, "rb") as reader: train_features = pickle.load(reader) except: train_features = convert_examples_to_features( examples=train_examples, tokenizer=tokenizer, max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, max_query_length=args.max_query_length, is_training=True) if args.local_rank == -1 or torch.distributed.get_rank() == 0: logger.info(" Saving train features into cached file %s", cached_train_features_file) with open(cached_train_features_file, "wb") as writer: pickle.dump(train_features, writer) logger.info("***** Running training *****") logger.info(" Num orig examples = %d", len(train_examples)) logger.info(" Num split examples = %d", len(train_features)) logger.info(" Batch size = %d", args.train_batch_size) logger.info(" Num steps = %d", num_train_steps) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_start_positions = torch.tensor( [f.start_position for f in train_features], dtype=torch.long) all_end_positions = torch.tensor( [f.end_position for f in train_features], dtype=torch.long) all_is_impossibles = torch.tensor( [int(f.is_impossible) for f in train_features], dtype=torch.long) train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_start_positions, all_end_positions, all_is_impossibles) if args.local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) model.train() for _ in trange(int(args.num_train_epochs), desc="Epoch"): for step, batch in enumerate( tqdm(train_dataloader, desc="Iteration")): if n_gpu == 1: batch = tuple( t.to(device) for t in batch) # multi-gpu does scattering it-self input_ids, input_mask, segment_ids, start_positions, end_positions, _ = batch loss = model(input_ids, segment_ids, input_mask, start_positions, end_positions) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: optimizer.backward(loss) else: loss.backward() if (step + 1) % args.gradient_accumulation_steps == 0: # modify learning rate with special warm up BERT uses lr_this_step = args.learning_rate * warmup_linear( global_step / t_total, args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() optimizer.zero_grad() global_step += 1 # Save a trained model model_to_save = model.module if hasattr( model, 'module') else model # Only save the model it-self output_model_file = os.path.join(args.output_dir, "pytorch_model.bin") if args.do_train: torch.save(model_to_save.state_dict(), output_model_file) # Load a trained model that you have fine-tuned model_state_dict = torch.load(output_model_file) model = BertForQuestionAnswering.from_pretrained( args.bert_model, state_dict=model_state_dict) model.to(device) if args.do_predict and (args.local_rank == -1 or torch.distributed.get_rank() == 0): eval_examples = read_squad_examples(input_file=args.predict_file, is_training=False) eval_features = convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, max_query_length=args.max_query_length, is_training=False) logger.info("***** Running predictions *****") logger.info(" Num orig examples = %d", len(eval_examples)) logger.info(" Num split examples = %d", len(eval_features)) logger.info(" Batch size = %d", args.predict_batch_size) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index) # Run prediction for full data eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.predict_batch_size) model.eval() all_results = [] logger.info("Start evaluating") for input_ids, input_mask, segment_ids, example_indices in tqdm( eval_dataloader, desc="Evaluating"): if len(all_results) % 1000 == 0: logger.info("Processing example: %d" % (len(all_results))) input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) with torch.no_grad(): batch_start_logits, batch_end_logits = model( input_ids, segment_ids, input_mask) for i, example_index in enumerate(example_indices): start_logits = batch_start_logits[i].detach().cpu().tolist() end_logits = batch_end_logits[i].detach().cpu().tolist() eval_feature = eval_features[example_index.item()] unique_id = int(eval_feature.unique_id) all_results.append( RawResult(unique_id=unique_id, start_logits=start_logits, end_logits=end_logits)) output_prediction_file = os.path.join(args.output_dir, "predictions.json") output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json") output_null_log_odds_file = os.path.join(args.output_dir, "null_odds.json") write_predictions(eval_examples, eval_features, all_results, args.n_best_size, args.max_answer_length, args.do_lower_case, output_prediction_file, output_nbest_file, output_null_log_odds_file, args.verbose_logging, True, args.null_score_diff_threshold)
def forward(self, x, x_mask, y, hidden): x_embedded = self.embed(x) y_embedded = self.embed(y) B, T = x.size() rev_index = torch.arange(T - 1, -1, -1).view(1, -1).expand(B, T).long() mask_length = torch.sum(1 - x_mask.data, 1).long().expand_as(rev_index) rev_index -= mask_length rev_index[rev_index < 0] = 0 rev_index = Variable(rev_index) x_backward = Variable(x.data.new(x.data.size()).fill_(0)) x_backward.scatter_(1, rev_index, x) x_backward_embedded = self.embed(x_backward) # encoder f_h = hidden[0] b_h = hidden[1] f_hiddens = [] b_hiddens = [] f_cells = [] b_cells = [] for i in range(T): f_h = self.fencoder(x_embedded[:, i, :], f_h) b_h = self.bencoder(x_backward_embedded[:, i, :], b_h) f_hiddens.append(f_h[0][-1].unsqueeze( 1)) # f_h[0][-1]: hidden state of the last layer b_hiddens.append(b_h[0][-1].unsqueeze(1)) f_cells.append(f_h[1][-1].unsqueeze(1)) b_cells.append(b_h[1][-1].unsqueeze(1)) f_hiddens = torch.cat(f_hiddens, 1) b_hiddens = torch.cat(b_hiddens, 1) f_cells = torch.cat(f_cells, 1) b_cells = torch.cat(b_cells, 1) hiddens = torch.cat([f_hiddens, b_hiddens], 2) cells = torch.cat([f_cells, b_cells], 2) # decoder B_y, T_y = y.size() h_mean = torch.mean(hiddens, 1).squeeze(1) c_mean = torch.mean(cells, 1).squeeze(1) hx, cx = [], [] for i in range(self.num_layers): hx.append(h_mean.clone()) cx.append(c_mean.clone()) y_embedded = self.embed(y) context = h_mean out_hiddens = [] for i in range(T_y): hx, cx = self.decoder(y_embedded[:, i, :], (hx, cx)) att = self.att_layer(hx[-1].unsqueeze(1).expand_as(hiddens).contiguous()\ , hiddens.contiguous()) # code.interact(local=locals()) context = (hiddens * att.unsqueeze(2).expand_as(hiddens)).sum(1).squeeze(1) out_hiddens.append(torch.cat([hx[-1], context], 1).unsqueeze(1)) out_hiddens = torch.cat(out_hiddens, 1) # code.interact(local=locals()) # output layer decoded = self.linear( out_hiddens.view( out_hiddens.size(0) * out_hiddens.size(1), out_hiddens.size(2))) decoded = F.log_softmax(decoded) return decoded.view(out_hiddens.size(0), out_hiddens.size(1), decoded.size(1)), out_hiddens
def test_neighbor_sampler_on_cora(get_dataset): dataset = get_dataset(name='Cora') data = dataset[0] batch = torch.arange(10) loader = NeighborSampler(data.edge_index, sizes=[-1, -1, -1], node_idx=batch, batch_size=10) class SAGE(torch.nn.Module): def __init__(self, in_channels, out_channels): super().__init__() self.convs = torch.nn.ModuleList() self.convs.append(SAGEConv(in_channels, 16)) self.convs.append(SAGEConv(16, 16)) self.convs.append(SAGEConv(16, out_channels)) def batch(self, x, adjs): for i, (edge_index, _, size) in enumerate(adjs): x_target = x[:size[1]] # Target nodes are always placed first. x = self.convs[i]((x, x_target), edge_index) return x def full(self, x, edge_index): for conv in self.convs: x = conv(x, edge_index) return x model = SAGE(dataset.num_features, dataset.num_classes) _, n_id, adjs = next(iter(loader)) out1 = model.batch(data.x[n_id], adjs) out2 = model.full(data.x, data.edge_index)[batch] assert torch.allclose(out1, out2, atol=1e-7) class GAT(torch.nn.Module): def __init__(self, in_channels, out_channels): super().__init__() self.convs = torch.nn.ModuleList() self.convs.append(GATConv(in_channels, 16, heads=2)) self.convs.append(GATConv(32, 16, heads=2)) self.convs.append(GATConv(32, out_channels, heads=2, concat=False)) def batch(self, x, adjs): for i, (edge_index, _, size) in enumerate(adjs): x_target = x[:size[1]] # Target nodes are always placed first. x = self.convs[i]((x, x_target), edge_index) return x def full(self, x, edge_index): for conv in self.convs: x = conv(x, edge_index) return x _, n_id, adjs = next(iter(loader)) out1 = model.batch(data.x[n_id], adjs) out2 = model.full(data.x, data.edge_index)[batch] assert torch.allclose(out1, out2, atol=1e-7)
def evaluate(self, data_loader, model, task=args.task): # reset metrics self._reset() # switch to evaluate mode model.eval() entity_ids = torch.arange(end=len(self.vocabs[ENTITY])).to(DEVICE) with torch.no_grad(): for _, data in enumerate(data_loader): val_triples = data[TRIPLE] # get batch size batch_size = val_triples.shape[0] all_entities = entity_ids.repeat(batch_size, 1) heads, relations, tails = val_triples[:, 0], val_triples[:, 1], val_triples[:, 2] # exapnd for all entities expanded_heads = heads.reshape(-1, 1).repeat( 1, all_entities.size()[1]) expanded_relations = relations.reshape(-1, 1).repeat( 1, all_entities.size()[1]) expanded_triples = torch.stack( (expanded_heads, expanded_relations, all_entities), dim=2).reshape(-1, val_triples.shape[1]) if args.demographic_aware: expanded_demographics = data[DEMOGRAPHIC].reshape( -1, 1).repeat(1, all_entities.size()[1]).reshape(-1, 1).squeeze() if args.prob_embedding: expanded_probabilities = data[PROBABILITY].reshape( -1, 1).repeat(1, all_entities.size()[1]).reshape(-1, 1).squeeze() # chunk data and predict results predicted_tails = [] for i in range(0, len(expanded_triples), batch_size**2): model_data = { TRIPLE: expanded_triples[i:i + batch_size**2] } if args.demographic_aware: model_data.update({ DEMOGRAPHIC: expanded_demographics[i:i + batch_size**2] }) if args.prob_embedding: model_data.update({ PROBABILITY: expanded_probabilities[i:i + batch_size**2] }) predicted_tails.append(model.predict(model_data)) predicted_tails = torch.cat(predicted_tails, dim=0).reshape(batch_size, -1) # rank results self._rank(predicted_tails, tails, task) return self._results()
def heatmaps_to_keypoints(maps: torch.Tensor, rois: torch.Tensor) -> torch.Tensor: """ Extract predicted keypoint locations from heatmaps. Args: maps (Tensor): (#ROIs, #keypoints, POOL_H, POOL_W). The predicted heatmap of logits for each ROI and each keypoint. rois (Tensor): (#ROIs, 4). The box of each ROI. Returns: Tensor of shape (#ROIs, #keypoints, 4) with the last dimension corresponding to (x, y, logit, score) for each keypoint. When converting discrete pixel indices in an NxN image to a continuous keypoint coordinate, we maintain consistency with :meth:`Keypoints.to_heatmap` by using the conversion from Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate. """ # The decorator use of torch.no_grad() was not supported by torchscript. # https://github.com/pytorch/pytorch/issues/44768 maps = maps.detach() rois = rois.detach() offset_x = rois[:, 0] offset_y = rois[:, 1] widths = (rois[:, 2] - rois[:, 0]).clamp(min=1) heights = (rois[:, 3] - rois[:, 1]).clamp(min=1) widths_ceil = widths.ceil() heights_ceil = heights.ceil() num_rois, num_keypoints = maps.shape[:2] xy_preds = maps.new_zeros(rois.shape[0], num_keypoints, 4) width_corrections = widths / widths_ceil height_corrections = heights / heights_ceil keypoints_idx = torch.arange(num_keypoints, device=maps.device) for i in range(num_rois): outsize = (int(heights_ceil[i]), int(widths_ceil[i])) roi_map = F.interpolate(maps[[i]], size=outsize, mode="bicubic", align_corners=False).squeeze( 0) # #keypoints x H x W # softmax over the spatial region max_score, _ = roi_map.view(num_keypoints, -1).max(1) max_score = max_score.view(num_keypoints, 1, 1) tmp_full_resolution = (roi_map - max_score).exp_() tmp_pool_resolution = (maps[i] - max_score).exp_() # Produce scores over the region H x W, but normalize with POOL_H x POOL_W, # so that the scores of objects of different absolute sizes will be more comparable roi_map_scores = tmp_full_resolution / tmp_pool_resolution.sum( (1, 2), keepdim=True) w = roi_map.shape[2] pos = roi_map.view(num_keypoints, -1).argmax(1) x_int = pos % w y_int = (pos - x_int) // w assert (roi_map_scores[keypoints_idx, y_int, x_int] == roi_map_scores.view( num_keypoints, -1).max(1)[0]).all() x = (x_int.float() + 0.5) * width_corrections[i] y = (y_int.float() + 0.5) * height_corrections[i] xy_preds[i, :, 0] = x + offset_x[i] xy_preds[i, :, 1] = y + offset_y[i] xy_preds[i, :, 2] = roi_map[keypoints_idx, y_int, x_int] xy_preds[i, :, 3] = roi_map_scores[keypoints_idx, y_int, x_int] return xy_preds
b_size = b.size() b_size b.numel() # b中元素总个数,2*3,等价于b.nelement() # 创建一个和b形状一样的tensor c = t.Tensor(b_size) # 创建一个元素为2和3的tensor d = t.Tensor((2,3)) c, d c.shape t.ones(2, 3) t.zeros(2, 3) t.arange(1, 6, 2) t.linspace(1, 10, 3) t.randn(2, 3, device=t.device('cpu')) t.randperm(5) t.eye(2, 3, dtype=t.int) scalar = t.tensor(3.14159) print('scalar: %s, shape of sclar: %s' %(scalar, scalar.shape)) vector = t.tensor([1, 2]) print('vector: %s, shape of vector: %s' %(vector, vector.shape)) tensor = t.Tensor(1, 2) tensor.shape matrix = t.tensor([[0.1, 1.2], [2.2, 3.1], [4.9, 5.2]])
def _generate( self, model, sample, prefix_tokens=None, bos_token=None, **kwargs ): if not self.retain_dropout: model.eval() # model.forward normally channels prev_output_tokens into the decoder # separately, but SequenceGenerator directly calls model.encoder encoder_input = { k: v for k, v in sample['net_input'].items() if k != 'prev_output_tokens' } src_tokens = encoder_input['src_tokens'] src_lengths = (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)).long().sum(dim=1) input_size = src_tokens.size() # batch dimension goes first followed by source lengths bsz = input_size[0] src_len = input_size[1] beam_size = self.beam_size if self.match_source_len: max_len = src_lengths.max().item() else: max_len = min( int(self.max_len_a * src_len + self.max_len_b), # exclude the EOS marker model.max_decoder_positions() - 1, ) # compute the encoder output for each beam encoder_outs = model.forward_encoder(encoder_input) new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1) new_order = new_order.to(src_tokens.device).long() encoder_outs = model.reorder_encoder_out(encoder_outs, new_order) # initialize buffers scores = src_tokens.new(bsz * beam_size, max_len + 1).float().fill_(0) scores_buf = scores.clone() tokens = src_tokens.new(bsz * beam_size, max_len + 2).long().fill_(self.pad) tokens_buf = tokens.clone() tokens[:, 0] = self.eos if bos_token is None else bos_token attn, attn_buf = None, None # The blacklist indicates candidates that should be ignored. # For example, suppose we're sampling and have already finalized 2/5 # samples. Then the blacklist would mark 2 positions as being ignored, # so that we only finalize the remaining 3 samples. blacklist = src_tokens.new_zeros(bsz, beam_size).eq(-1) # forward and backward-compatible False mask # list of completed sentences finalized = [[] for i in range(bsz)] finished = [False for i in range(bsz)] num_remaining_sent = bsz # number of candidate hypos per step cand_size = 2 * beam_size # 2 x beam size in case half are EOS # offset arrays for converting between different indexing schemes bbsz_offsets = (torch.arange(0, bsz) * beam_size).unsqueeze(1).type_as(tokens) cand_offsets = torch.arange(0, cand_size).type_as(tokens) # helper function for allocating buffers on the fly buffers = {} def buffer(name, type_of=tokens): # noqa if name not in buffers: buffers[name] = type_of.new() return buffers[name] def is_finished(sent, step, unfin_idx): """ Check whether we've finished generation for a given sentence, by comparing the worst score among finalized hypotheses to the best possible score among unfinalized hypotheses. """ assert len(finalized[sent]) <= beam_size if len(finalized[sent]) == beam_size: return True return False def finalize_hypos(step, bbsz_idx, eos_scores): """ Finalize the given hypotheses at this step, while keeping the total number of finalized hypotheses per sentence <= beam_size. Note: the input must be in the desired finalization order, so that hypotheses that appear earlier in the input are preferred to those that appear later. Args: step: current time step bbsz_idx: A vector of indices in the range [0, bsz*beam_size), indicating which hypotheses to finalize eos_scores: A vector of the same size as bbsz_idx containing scores for each hypothesis """ assert bbsz_idx.numel() == eos_scores.numel() # clone relevant token and attention tensors tokens_clone = tokens.index_select(0, bbsz_idx) tokens_clone = tokens_clone[:, 1:step + 2] # skip the first index, which is EOS assert not tokens_clone.eq(self.eos).any() tokens_clone[:, step] = self.eos attn_clone = attn.index_select(0, bbsz_idx)[:, :, 1:step+2] if attn is not None else None # compute scores per token position pos_scores = scores.index_select(0, bbsz_idx)[:, :step+1] pos_scores[:, step] = eos_scores # convert from cumulative to per-position scores pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1] # normalize sentence-level scores if self.normalize_scores: eos_scores /= (step + 1) ** self.len_penalty cum_unfin = [] prev = 0 for f in finished: if f: prev += 1 else: cum_unfin.append(prev) sents_seen = set() for i, (idx, score) in enumerate(zip(bbsz_idx.tolist(), eos_scores.tolist())): unfin_idx = idx // beam_size sent = unfin_idx + cum_unfin[unfin_idx] sents_seen.add((sent, unfin_idx)) if self.match_source_len and step > src_lengths[unfin_idx]: score = -math.inf def get_hypo(): if attn_clone is not None: # remove padding tokens from attn scores hypo_attn = attn_clone[i] else: hypo_attn = None return { 'tokens': tokens_clone[i], 'score': score, 'attention': hypo_attn, # src_len x tgt_len 'alignment': None, 'positional_scores': pos_scores[i], } if len(finalized[sent]) < beam_size: finalized[sent].append(get_hypo()) newly_finished = [] for sent, unfin_idx in sents_seen: # check termination conditions for this sentence if not finished[sent] and is_finished(sent, step, unfin_idx): finished[sent] = True newly_finished.append(unfin_idx) return newly_finished reorder_state = None batch_idxs = None for step in range(max_len + 1): # one extra step for EOS marker # reorder decoder internal states based on the prev choice of beams if reorder_state is not None: if batch_idxs is not None: # update beam indices to take into account removed sentences corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as(batch_idxs) reorder_state.view(-1, beam_size).add_(corr.unsqueeze(-1) * beam_size) model.reorder_incremental_state(reorder_state) encoder_outs = model.reorder_encoder_out(encoder_outs, reorder_state) lprobs, avg_attn_scores = model.forward_decoder( tokens[:, :step + 1], encoder_outs, temperature=self.temperature, **kwargs ) lprobs[:, self.pad] = -math.inf # never select pad lprobs[:, self.unk] -= self.unk_penalty # apply unk penalty # handle min and max length constraints if step >= max_len: lprobs[:, :self.eos] = -math.inf lprobs[:, self.eos + 1:] = -math.inf elif step < self.min_len: lprobs[:, self.eos] = -math.inf # handle prefix tokens (possibly with different lengths) if prefix_tokens is not None and step < prefix_tokens.size(1): prefix_toks = prefix_tokens[:, step].unsqueeze(-1).repeat(1, beam_size).view(-1) prefix_lprobs = lprobs.gather(-1, prefix_toks.unsqueeze(-1)) prefix_mask = prefix_toks.ne(self.pad) lprobs[prefix_mask] = -math.inf lprobs[prefix_mask] = lprobs[prefix_mask].scatter_( -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_lprobs ) # if prefix includes eos, then we should make sure tokens and # scores are the same across all beams eos_mask = prefix_toks.eq(self.eos) if eos_mask.any(): # validate that the first beam matches the prefix first_beam = tokens[eos_mask].view(-1, beam_size, tokens.size(-1))[:, 0, 1:step + 1] eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0] target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step] assert (first_beam == target_prefix).all() def replicate_first_beam(tensor, mask): tensor = tensor.view(-1, beam_size, tensor.size(-1)) tensor[mask] = tensor[mask][:, :1, :] return tensor.view(-1, tensor.size(-1)) # copy tokens, scores and lprobs from the first beam to all beams tokens = replicate_first_beam(tokens, eos_mask_batch_dim) scores = replicate_first_beam(scores, eos_mask_batch_dim) lprobs = replicate_first_beam(lprobs, eos_mask_batch_dim) if self.no_repeat_ngram_size > 0: # for each beam and batch sentence, generate a list of previous ngrams gen_ngrams = [{} for bbsz_idx in range(bsz * beam_size)] for bbsz_idx in range(bsz * beam_size): gen_tokens = tokens[bbsz_idx].tolist() for ngram in zip(*[gen_tokens[i:] for i in range(self.no_repeat_ngram_size)]): gen_ngrams[bbsz_idx][tuple(ngram[:-1])] = \ gen_ngrams[bbsz_idx].get(tuple(ngram[:-1]), []) + [ngram[-1]] # Record attention scores if avg_attn_scores is not None: if attn is None: attn = scores.new(bsz * beam_size, src_tokens.size(1), max_len + 2) attn_buf = attn.clone() attn[:, :, step + 1].copy_(avg_attn_scores) scores = scores.type_as(lprobs) scores_buf = scores_buf.type_as(lprobs) eos_bbsz_idx = buffer('eos_bbsz_idx') eos_scores = buffer('eos_scores', type_of=scores) self.search.set_src_lengths(src_lengths) if self.no_repeat_ngram_size > 0: def calculate_banned_tokens(bbsz_idx): # before decoding the next token, prevent decoding of ngrams that have already appeared ngram_index = tuple(tokens[bbsz_idx, step + 2 - self.no_repeat_ngram_size:step + 1].tolist()) return gen_ngrams[bbsz_idx].get(ngram_index, []) if step + 2 - self.no_repeat_ngram_size >= 0: # no banned tokens if we haven't generated no_repeat_ngram_size tokens yet banned_tokens = [calculate_banned_tokens(bbsz_idx) for bbsz_idx in range(bsz * beam_size)] else: banned_tokens = [[] for bbsz_idx in range(bsz * beam_size)] for bbsz_idx in range(bsz * beam_size): lprobs[bbsz_idx, banned_tokens[bbsz_idx]] = -math.inf cand_scores, cand_indices, cand_beams = self.search.step( step, lprobs.view(bsz, -1, self.vocab_size), scores.view(bsz, beam_size, -1)[:, :, :step], ) # cand_bbsz_idx contains beam indices for the top candidate # hypotheses, with a range of values: [0, bsz*beam_size), # and dimensions: [bsz, cand_size] cand_bbsz_idx = cand_beams.add(bbsz_offsets) # finalize hypotheses that end in eos (except for blacklisted ones) eos_mask = cand_indices.eq(self.eos) eos_mask[:, :beam_size][blacklist] = 0 # only consider eos when it's among the top beam_size indices torch.masked_select( cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size], out=eos_bbsz_idx, ) finalized_sents = set() if eos_bbsz_idx.numel() > 0: torch.masked_select( cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size], out=eos_scores, ) finalized_sents = finalize_hypos(step, eos_bbsz_idx, eos_scores) num_remaining_sent -= len(finalized_sents) assert num_remaining_sent >= 0 if num_remaining_sent == 0: break assert step < max_len if len(finalized_sents) > 0: new_bsz = bsz - len(finalized_sents) # construct batch_idxs which holds indices of batches to keep for the next pass batch_mask = cand_indices.new_ones(bsz) batch_mask[cand_indices.new(finalized_sents)] = 0 batch_idxs = batch_mask.nonzero().squeeze(-1) eos_mask = eos_mask[batch_idxs] cand_beams = cand_beams[batch_idxs] bbsz_offsets.resize_(new_bsz, 1) cand_bbsz_idx = cand_beams.add(bbsz_offsets) cand_scores = cand_scores[batch_idxs] cand_indices = cand_indices[batch_idxs] if prefix_tokens is not None: prefix_tokens = prefix_tokens[batch_idxs] src_lengths = src_lengths[batch_idxs] blacklist = blacklist[batch_idxs] scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) scores_buf.resize_as_(scores) tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) tokens_buf.resize_as_(tokens) if attn is not None: attn = attn.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, attn.size(1), -1) attn_buf.resize_as_(attn) bsz = new_bsz else: batch_idxs = None # Set active_mask so that values > cand_size indicate eos or # blacklisted hypos and values < cand_size indicate candidate # active hypos. After this, the min values per row are the top # candidate active hypos. active_mask = buffer('active_mask') eos_mask[:, :beam_size] |= blacklist torch.add( eos_mask.type_as(cand_offsets) * cand_size, cand_offsets[:eos_mask.size(1)], out=active_mask, ) # get the top beam_size active hypotheses, which are just the hypos # with the smallest values in active_mask active_hypos, new_blacklist = buffer('active_hypos'), buffer('new_blacklist') torch.topk( active_mask, k=beam_size, dim=1, largest=False, out=(new_blacklist, active_hypos) ) # update blacklist to ignore any finalized hypos blacklist = new_blacklist.ge(cand_size)[:, :beam_size] assert (~blacklist).any(dim=1).all() active_bbsz_idx = buffer('active_bbsz_idx') torch.gather( cand_bbsz_idx, dim=1, index=active_hypos, out=active_bbsz_idx, ) active_scores = torch.gather( cand_scores, dim=1, index=active_hypos, out=scores[:, step].view(bsz, beam_size), ) active_bbsz_idx = active_bbsz_idx.view(-1) active_scores = active_scores.view(-1) # copy tokens and scores for active hypotheses torch.index_select( tokens[:, :step + 1], dim=0, index=active_bbsz_idx, out=tokens_buf[:, :step + 1], ) torch.gather( cand_indices, dim=1, index=active_hypos, out=tokens_buf.view(bsz, beam_size, -1)[:, :, step + 1], ) if step > 0: torch.index_select( scores[:, :step], dim=0, index=active_bbsz_idx, out=scores_buf[:, :step], ) torch.gather( cand_scores, dim=1, index=active_hypos, out=scores_buf.view(bsz, beam_size, -1)[:, :, step], ) # copy attention for active hypotheses if attn is not None: torch.index_select( attn[:, :, :step + 2], dim=0, index=active_bbsz_idx, out=attn_buf[:, :, :step + 2], ) # swap buffers tokens, tokens_buf = tokens_buf, tokens scores, scores_buf = scores_buf, scores if attn is not None: attn, attn_buf = attn_buf, attn # reorder incremental state in decoder reorder_state = active_bbsz_idx # sort by score descending for sent in range(len(finalized)): finalized[sent] = sorted(finalized[sent], key=lambda r: r['score'], reverse=True) return finalized