def forward(self, input, label): # --------------------------- cos(theta) & phi(theta) --------------------------- if self.device_id == None: cosine = F.linear(F.normalize(input), F.normalize(self.weight)) else: x = input sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0) temp_x = x.cuda(self.device_id[0]) weight = sub_weights[0].cuda(self.device_id[0]) cosine = F.linear(F.normalize(temp_x), F.normalize(weight)) for i in range(1, len(self.device_id)): temp_x = x.cuda(self.device_id[i]) weight = sub_weights[i].cuda(self.device_id[i]) cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])), dim=1) sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) phi = cosine * self.cos_m - sine * self.sin_m if self.easy_margin: phi = torch.where(cosine > 0, phi, cosine) else: phi = torch.where(cosine > self.th, phi, cosine - self.mm) # --------------------------- convert label to one-hot --------------------------- one_hot = torch.zeros(cosine.size()) if self.device_id != None: one_hot = one_hot.cuda(self.device_id[0]) one_hot.scatter_(1, label.view(-1, 1).long(), 1) # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------- output = (one_hot * phi) + ((1.0 - one_hot) * cosine) # you can use torch.where if your torch.__version__ is 0.4 output *= self.s return output
def bisect_demo(): """ Bisect the LB/UB on specified columns. The key is to use scatter_() to convert indices into one-hot encodings. """ t1t2 = torch.stack((torch.randn(5, 4), torch.randn(5, 4)), dim=-1) lb, _ = torch.min(t1t2, dim=-1) ub, _ = torch.max(t1t2, dim=-1) print('LB:', lb) print('UB:', ub) # random idxs for testing idxs = torch.randn_like(lb) _, idxs = idxs.max(dim=-1) # <Batch> print('Split idxs:', idxs) idxs = idxs.unsqueeze(dim=-1) # Batch x 1 idxs = torch.zeros_like(lb).byte().scatter_(-1, idxs, 1) # convert into one-hot encoding print('Reorg idxs:', idxs) mid = (lb + ub) / 2.0 lefts_lb = lb lefts_ub = torch.where(idxs, mid, ub) # use the one-hot encoding to call torch.where() rights_lb = torch.where(idxs, mid, lb) # definitely faster than element-wise reassignment rights_ub = ub print('LEFT LB:', lefts_lb) print('LEFT UB:', lefts_ub) print('RIGHT LB:', rights_lb) print('RIGHT UB:', rights_ub) newlb = torch.cat((lefts_lb, rights_lb), dim=0) newub = torch.cat((lefts_ub, rights_ub), dim=0) return newlb, newub
def forward(self, x, label): cosine = F.linear(F.normalize(x), F.normalize(self.weight)) sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) phi = cosine * self.cos_m - sine * self.sin_m if self.easy_margin: phi = torch.where(cosine > 0, phi, cosine) else: phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm) one_hot = torch.zeros(cosine.size(), device='cuda') one_hot.scatter_(1, label.view(-1, 1).long(), 1) output = (one_hot * phi) + ((1.0 - one_hot) * cosine) output *= self.s return output
def forward(self, scores, align, target): """ Args: scores (FloatTensor): ``(batch_size*tgt_len)`` x dynamic vocab size whose sum along dim 1 is less than or equal to 1, i.e. cols softmaxed. align (LongTensor): ``(batch_size x tgt_len)`` target (LongTensor): ``(batch_size x tgt_len)`` """ # probabilities assigned by the model to the gold targets vocab_probs = scores.gather(1, target.unsqueeze(1)).squeeze(1) # probability of tokens copied from source copy_ix = align.unsqueeze(1) + self.vocab_size copy_tok_probs = scores.gather(1, copy_ix).squeeze(1) # Set scores for unk to 0 and add eps copy_tok_probs[align == self.unk_index] = 0 copy_tok_probs += self.eps # to avoid -inf logs # find the indices in which you do not use the copy mechanism non_copy = align == self.unk_index if not self.force_copy: non_copy = non_copy | (target != self.unk_index) probs = torch.where( non_copy, copy_tok_probs + vocab_probs, copy_tok_probs ) loss = -probs.log() # just NLLLoss; can the module be incorporated? # Drop padding. loss[target == self.ignore_index] = 0 return loss
def hingeembeddingloss_reference(input, target, margin=1.0, size_average=True, reduce=True): margin_clamp = (margin - input).clamp(min=0).type_as(input) output = torch.where(target == 1, input, margin_clamp) if reduce and size_average: return output.mean() elif reduce: return output.sum() return output
def forward(self, input, label): # --------------------------- cos(theta) & phi(theta) --------------------------- cosine = F.linear(F.normalize(input), F.normalize(self.weight)) sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) phi = cosine * self.cos_m - sine * self.sin_m if self.easy_margin: phi = torch.where(cosine > 0, phi, cosine) else: phi = torch.where(cosine > self.th, phi, cosine - self.mm) # --------------------------- convert label to one-hot --------------------------- # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda') one_hot = torch.zeros(cosine.size(), device = 'cuda') one_hot.scatter_(1, label.view(-1, 1).long(), 1) # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------- output = (one_hot * phi) + ((1.0 - one_hot) * cosine) # you can use torch.where if your torch.__version__ is 0.4 output *= self.s return output
def backward(ctx, grad_output): supp_size, output = ctx.saved_tensors dim = ctx.dim grad_input = grad_output.clone() grad_input[output == 0] = 0 v_hat = grad_input.sum(dim=dim) / supp_size.to(output.dtype).squeeze() v_hat = v_hat.unsqueeze(dim) grad_input = torch.where(output != 0, grad_input - v_hat, grad_input) return grad_input, None
def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): """ very similar to the smooth_l1_loss from pytorch, but with the extra beta parameter """ n = torch.abs(input - target) cond = n < beta loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) if size_average: return loss.mean() return loss.sum()
def lid(Mxy, k): eps_mat = torch.where(Mxy > 1e-20, torch.zeros((1, 1)), torch.ones((1, 1)) * 1e-20).detach() Mxy = Mxy + eps_mat value, idx = Mxy.topk(k=k, largest=False) mask = torch.zeros(Mxy.size()).type(Mxy.type()) mask.scatter_(1, idx, 1.0) r_max = value[:, -1].detach() # est = -1 / (1. / k * torch.sum(torch.log(Mxy + eps_mat) * mask, dim=-1) - torch.log(r_max)) est = -1 / (torch.mean(torch.log(value), dim=-1) - torch.log(r_max)) return est
def cosineembeddingloss_reference(input1, input2, target, margin=0, size_average=True, reduce=True): def _cos(a, b): cos = a.new(a.size(0)) for i in range(0, a.size(0)): cos[i] = (a[i] * b[i]).sum() / ((((a[i] * a[i]).sum() + 1e-12) * ((b[i] * b[i]).sum() + 1e-12)) ** 0.5) return cos output = torch.where(target == 1, 1 - _cos(input1, input2), (_cos(input1, input2) - margin).clamp(min=0)) if reduce and size_average: return output.mean() elif reduce: return output.sum() return output
def hingeembeddingloss_reference(input, target, margin=1.0, size_average=True, reduce=True): # needed for legacy tests if not isinstance(input, Variable): input = Variable(input) target = Variable(target) margin_clamp = (margin - input).clamp(min=0).type_as(input) output = torch.where(target == 1, input, margin_clamp) if reduce and size_average: return output.mean() elif reduce: return output.sum() return output
def forward(self, cosine, target): sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) phi = cosine * self.cos_m - sine * self.sin_m phi = torch.where(cosine > self.th, phi, cosine - self.mm) # --------------------------- convert label to one-hot --------------------------- # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda') one_hot = torch.zeros(cosine.size(), device='cuda') one_hot.scatter_(1, target.view(-1, 1).long(), 1) # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------- output = (one_hot * phi) + ((1.0 - one_hot) * cosine) # you can use torch.where if your torch.__version__ is 0.4 #output = output - one_hot * self.m_cos # cosine-margin output *= self.s loss = self.ce(output, target) return loss
def forward(self, input, adj): h = torch.mm(input, self.W) N = h.size()[0] a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2 * self.out_features) e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2)) zero_vec = -9e15*torch.ones_like(e) attention = torch.where(adj > 0, e, zero_vec) attention = F.softmax(attention, dim=1) attention = F.dropout(attention, self.dropout, training=self.training) h_prime = torch.matmul(attention, h) if self.concat: return F.elu(h_prime) else: return h_prime
def forward(self, X, Z=None, diag=False): variance = self.get_param("variance") if Z is None: Z = X X = self._slice_input(X) if diag: return variance * X.abs().squeeze(1) Z = self._slice_input(Z) if X.shape[1] != Z.shape[1]: raise ValueError("Inputs must have the same number of features.") Zt = Z.t() return torch.where(X.sign() == Zt.sign(), variance * torch.min(X.abs(), Zt.abs()), X.data.new_zeros(X.shape[0], Z.shape[0]))
def forward(ctx, input, target): """ input (FloatTensor): ``(n, num_classes)``. target (LongTensor): ``(n,)``, the indices of the target classes """ input_batch, classes = input.size() target_batch = target.size(0) aeq(input_batch, target_batch) z_k = input.gather(1, target.unsqueeze(1)).squeeze() tau_z, support_size = _threshold_and_support(input, dim=1) support = input > tau_z x = torch.where( support, input**2 - tau_z**2, torch.tensor(0.0, device=input.device) ).sum(dim=1) ctx.save_for_backward(input, target, tau_z) # clamping necessary because of numerical errors: loss should be lower # bounded by zero, but negative values near zero are possible without # the clamp return torch.clamp(x / 2 - z_k + 0.5, min=0.0)
def shapeOptimizer(input_antenna, numsteps=10000, load=False, model=None): target = torch.Tensor([60,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01]).to(device) target = target.expand(input_antenna.size(0), 153) # input_antenna = random test.draw_antenna(input_antenna[0].numpy(), 'input_antenna1.jpg') input_antenna = input_antenna.float().to(device) if model is None: model = DeepResField(Block, [2, 3, 1]).to(device) if load: checkpoint = torch.load('checkpoint.pt.tar') model.load_state_dict(checkpoint['state_dict']) model.eval() for param in model.parameters(): param.requires_grad = False criterion = nn.MSELoss() optimizer = optim.Adam([input_antenna.requires_grad_()], lr=0.0001, weight_decay=0.001) for step in range(numsteps): output = model(input_antenna) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() input_antenna.data.clamp_(0., 1.) # input_antenna = torch.clamp(input_antenna, min=0., max=1.) if step % 500 == 0: print("Step: {}, loss: {}, input: {}".format(step, loss.item(), input_antenna)) final_antenna = torch.where(input_antenna >= torch.Tensor([0.5]).to(device), torch.Tensor([1]).to(device), torch.Tensor([0]).to(device)) print(final_antenna) test.draw_antenna(final_antenna[0].numpy(), 'final_antenna1.jpg') torch.save(final_antenna, 'final_antenna.pt')
def update(self, max_iter, iteration): # Velocities update #self.chi=(self.chi_max-self.chi_min)*((max_iter-itera)/max_iter) R_p = torch.rand(size=(self.size, self.dim)).double().cuda() R_g = torch.rand(size=(self.size, self.dim)).double().cuda() lr1, lr2 = self.layer_dim[0], self.layer_dim[1] self.V = torch.where(self.V > 0.5, (-self.V_max) / 5, self.V) self.V = torch.where(self.V < -0.5, (-self.V_min) / 5, self.V) self.X = torch.where(self.X > 40.0, self.X_max, self.X) self.X = torch.where(self.X < -4.0, self.X_min, self.X) #self.layer_select() self.V[:,lr1:lr2] = self.chi * (self.V[:,lr1:lr2]) \ + self.phi_p * R_p[:,lr1:lr2] * (self.P[:,lr1:lr2] - self.X[:,lr1:lr2]) \ + self.phi_g * R_g[:,lr1:lr2] * (self.G[:,lr1:lr2] - self.X[:,lr1:lr2]) # Positions update self.X[:, lr1:lr2] = self.X[:, lr1:lr2] + self.V[:, lr1:lr2] print(self.X.max().cpu().numpy(), self.V.max().cpu().numpy()) # Best scores #print(self.X.size(),"X") scores = self.cost_func(self.X) better_scores_idx = scores < self.S #print(better_scores_idx) self.P = torch.where(better_scores_idx, self.X, self.P) #self.P[better_scores_idx] = self.X[better_scores_idx,:] self.S[better_scores_idx] = scores[better_scores_idx] self.g = self.P[self.S.argmin()] self.best_score = self.S.min() #print(self.X.dtype,self.P.dtype,self.V.dtype,self.G.dtype,better_scores_idx.size(),scores.size(),scores,better_scores_idx) idx = 0 for i in range(self.size // self.neghburs): if (idx == 0 and idx < self.size): max_matrix = self.S[self.size - 1] #Pick last particle to complete ring #print(idx,idx+self.neghburs,i) max_matrix = np.append( max_matrix, self.S[0:(self.neghburs + 1)]) #Append the next 5 neighbours #print(max_matrix.shape) assert (max_matrix.shape, (self.neghburs + 2)) #Add support t add into G local best if is at zero if (max_matrix.argmin() == 0): #If gbest is last element self.G[0:(idx + self.neghburs), :] = self.P[self.size - 1] else: #else self.G[0:(idx + self.neghburs), :] = self.P[max_matrix.argmin() - 1] #print(self.P[max_matrix.argmin()].shape,self.G[0:(idx+self.neghburs),:].shape,"Here1") idx = idx + self.neghburs elif ((self.size - idx) <= self.neghburs and idx < self.size): max_matrix = self.S[idx - 1:] max_matrix = np.append(max_matrix, self.S[0]) #print(idx+(max_matrix.argmin()-1)) #print(max_matrix.shape,"Now") if (max_matrix.argmin() == self.neghburs + 1): #First element is gbest self.G[idx:, :] = self.P[0] #print("here \too") #print(self.P[0].shape,self.G[idx:,:].shape,"here2") else: self.G[idx:, :] = self.P[idx + (max_matrix.argmin() - 1)] #print(self.P[idx+(max_matrix.argmin()-1)].shape,self.G[idx:,:].shape,"here2") #print(idx,i) idx = idx + self.neghburs elif (idx < self.size and (self.size - idx) > self.neghburs): max_matrix = scores[idx - 1:idx + (self.neghburs + 1)] #print(max_matrix.shape,"here") assert (max_matrix.shape, self.neghburs + 2) #add suppot to add to G if the global is from last position self.G[idx:idx + self.neghburs, :] = self.P[idx + (max_matrix.argmin() - 1)] #print(idx,i) #print(self.P[idx+(max_matrix.argmin()-1)].shape,self.G[idx:idx+self.neghburs,:].shape,"here3") idx = idx + self.neghburs assert (self.X.shape, self.G.shape)
def search(self, start_predictions: torch.Tensor, start_state: StateType, step: StepFunctionType) -> Tuple[torch.Tensor, torch.Tensor]: """ Given a starting state and a step function, apply beam search to find the most likely target sequences. Parameters ---------- start_predictions : ``torch.Tensor`` A tensor containing the initial predictions with shape ``(batch_size,)``. Usually the initial predictions are just the index of the "start" token in the target vocabulary. start_state : ``StateType`` The initial state passed to the ``step`` function. Each value of the state dict should be a tensor of shape ``(batch_size, *)``, where ``*`` means any other number of dimensions. step : ``StepFunctionType`` A function that is responsible for computing the next most likely tokens, given the current state and the predictions from the last time step. The function should accept two arguments. The first being a tensor of shape ``(group_size,)``, representing the index of the predicted tokens from the last time step, and the second being the current state. The ``group_size`` will be ``batch_size * beam_size``, except in the initial step, for which it will just be ``batch_size``. The function is expected to return a tuple, where the first element is a tensor of shape ``(group_size, target_vocab_size)`` containing the log probabilities of the tokens for the next step, and the second element is the updated state. The tensor in the state should have shape ``(group_size, *)``, where ``*`` means any other number of dimensions. Returns ------- Tuple[torch.Tensor, torch.Tensor] Tuple of ``(predictions, log_probabilities)``, where ``predictions`` has shape ``(batch_size, beam_size, max_steps)`` and ``log_probabilities`` has shape ``(batch_size, beam_size)``. """ batch_size = start_predictions.size()[0] # List of (batch_size, beam_size) tensors. One for each time step. Does not # include the start symbols, which are implicit. predictions: List[torch.Tensor] = [] # List of (batch_size, beam_size) tensors. One for each time step. None for # the first. Stores the index n for the parent prediction, i.e. # predictions[t-1][i][n], that it came from. backpointers: List[torch.Tensor] = [] # Calculate the first timestep. This is done outside the main loop # because we are going from a single decoder input (the output from the # encoder) to the top `beam_size` decoder outputs. On the other hand, # within the main loop we are going from the `beam_size` elements of the # beam to `beam_size`^2 candidates from which we will select the top # `beam_size` elements for the next iteration. # shape: (batch_size, num_classes) start_class_log_probabilities, state = step(start_predictions, start_state) num_classes = start_class_log_probabilities.size()[1] # Make sure `per_node_beam_size` is not larger than `num_classes`. if self.per_node_beam_size > num_classes: raise ConfigurationError(f"Target vocab size ({num_classes:d}) too small " f"relative to per_node_beam_size ({self.per_node_beam_size:d}).\n" f"Please decrease beam_size or per_node_beam_size.") # shape: (batch_size, beam_size), (batch_size, beam_size) start_top_log_probabilities, start_predicted_classes = \ start_class_log_probabilities.topk(self.beam_size) # The log probabilities for the last time step. # shape: (batch_size, beam_size) last_log_probabilities = start_top_log_probabilities # shape: [(batch_size, beam_size)] predictions.append(start_predicted_classes) # Log probability tensor that mandates that the end token is selected. # shape: (batch_size * beam_size, num_classes) log_probs_after_end = start_class_log_probabilities.new_full( (batch_size * self.beam_size, num_classes), float("-inf") ) log_probs_after_end[:, self._end_index] = 0. # Set the same state for each element in the beam. for key, state_tensor in state.items(): _, *last_dims = state_tensor.size() # shape: (batch_size * beam_size, *) state[key] = state_tensor.\ unsqueeze(1).\ expand(batch_size, self.beam_size, *last_dims).\ reshape(batch_size * self.beam_size, *last_dims) for timestep in range(self.max_steps - 1): # shape: (batch_size * beam_size,) last_predictions = predictions[-1].reshape(batch_size * self.beam_size) # If every predicted token from the last step is `self._end_index`, # then we can stop early. if (last_predictions == self._end_index).all(): break # Take a step. This get the predicted log probs of the next classes # and updates the state. # shape: (batch_size * beam_size, num_classes) class_log_probabilities, state = step(last_predictions, state) # shape: (batch_size * beam_size, num_classes) last_predictions_expanded = last_predictions.unsqueeze(-1).expand( batch_size * self.beam_size, num_classes ) # Here we are finding any beams where we predicted the end token in # the previous timestep and replacing the distribution with a # one-hot distribution, forcing the beam to predict the end token # this timestep as well. # shape: (batch_size * beam_size, num_classes) cleaned_log_probabilities = torch.where( last_predictions_expanded == self._end_index, log_probs_after_end, class_log_probabilities ) top_log_probabilities, predicted_classes = \ cleaned_log_probabilities.topk(self.per_node_beam_size) # shape (both): (batch_size * beam_size, per_node_beam_size) # Here we expand the last log probabilities to (batch_size * beam_size, per_node_beam_size) # so that we can add them to the current log probs for this timestep. # This lets us maintain the log probability of each element on the beam. # shape: (batch_size * beam_size, per_node_beam_size) expanded_last_log_probabilities = last_log_probabilities.\ unsqueeze(2).\ expand(batch_size, self.beam_size, self.per_node_beam_size).\ reshape(batch_size * self.beam_size, self.per_node_beam_size) # shape: (batch_size * beam_size, per_node_beam_size) summed_top_log_probabilities = top_log_probabilities + expanded_last_log_probabilities # shape: (batch_size, beam_size * per_node_beam_size) reshaped_summed = summed_top_log_probabilities.\ reshape(batch_size, self.beam_size * self.per_node_beam_size) # shape: (batch_size, beam_size * per_node_beam_size) reshaped_predicted_classes = predicted_classes.\ reshape(batch_size, self.beam_size * self.per_node_beam_size) # Keep only the top `beam_size` beam indices. # shape: (batch_size, beam_size), (batch_size, beam_size) restricted_beam_log_probs, restricted_beam_indices = reshaped_summed.topk(self.beam_size) # Use the beam indices to extract the corresponding classes. # shape: (batch_size, beam_size) restricted_predicted_classes = reshaped_predicted_classes.gather(1, restricted_beam_indices) predictions.append(restricted_predicted_classes) # shape: (batch_size, beam_size) last_log_probabilities = restricted_beam_log_probs # The beam indices come from a `beam_size * per_node_beam_size` dimension where the # indices with a common ancestor are grouped together. Hence # dividing by per_node_beam_size gives the ancestor. (Note that this is integer # division as the tensor is a LongTensor.) # shape: (batch_size, beam_size) backpointer = restricted_beam_indices / self.per_node_beam_size backpointers.append(backpointer) # Keep only the pieces of the state tensors corresponding to the # ancestors created this iteration. for key, state_tensor in state.items(): _, *last_dims = state_tensor.size() # shape: (batch_size, beam_size, *) expanded_backpointer = backpointer.\ view(batch_size, self.beam_size, *([1] * len(last_dims))).\ expand(batch_size, self.beam_size, *last_dims) # shape: (batch_size * beam_size, *) state[key] = state_tensor.\ reshape(batch_size, self.beam_size, *last_dims).\ gather(1, expanded_backpointer).\ reshape(batch_size * self.beam_size, *last_dims) # Reconstruct the sequences. # shape: [(batch_size, beam_size, 1)] reconstructed_predictions = [predictions[-1].unsqueeze(2)] # shape: (batch_size, beam_size) cur_backpointers = backpointers[-1] for timestep in range(len(predictions) - 2, 0, -1): # shape: (batch_size, beam_size, 1) cur_preds = predictions[timestep].gather(1, cur_backpointers).unsqueeze(2) reconstructed_predictions.append(cur_preds) # shape: (batch_size, beam_size) cur_backpointers = backpointers[timestep - 1].gather(1, cur_backpointers) # shape: (batch_size, beam_size, 1) final_preds = predictions[0].gather(1, cur_backpointers).unsqueeze(2) reconstructed_predictions.append(final_preds) # shape: (batch_size, beam_size, max_steps) all_predictions = torch.cat(list(reversed(reconstructed_predictions)), 2) return all_predictions, last_log_probabilities
def bs_test(model_name='bert-large-uncased-whole-word-masking' ): # distilbert-base-uncased sys.path.append('/Users/i350230/GITHUB/CTCDecoder/src/') import editdistance as ed from BKTree import BKTree from collections import defaultdict from transformers import AutoModelWithLMHead, AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelWithLMHead.from_pretrained(model_name) with open('_Data_metadata_letters_wav2vec.pk', 'rb') as f: data = pk.load(f) with open('_ctc_exp3_predictions.pk', 'rb') as f: res = pk.load(f) idx_to_tokens = ['<blank>'] + data['idx_to_tokens'][3:] tokens_to_idx = {t: i for i, t in enumerate(idx_to_tokens)} greedy_preds = [ np.array(p).argmax(-1).tolist() for p in res['predictions'] ] target_sentences = [ ''.join([idx_to_tokens[i] for i in t[:t.index(0) if 0 in t else None]]) for t in res['targets'] ] greedy_preds_sentences = [[i for i, _ in groupby(p)] for p in greedy_preds] greedy_preds_sentences = [ ''.join([idx_to_tokens[i] for i in p if i != 0]) for p in greedy_preds_sentences ] print( Data.compute_scores(targets=target_sentences, predictions=greedy_preds_sentences, rec=False)) vocabs = list( set([ w for s in data['ids_to_transcript_train'].values() for w in s.lower().split(' ') ])) vocabs += list( set([ w for s in data['ids_to_transcript_test'].values() for w in s.lower().split(' ') ])) # bk_tree = BKTree(vocabs) vocabs_set = set(vocabs) print(f'Vocab size = {len(vocabs_set)}') # for t, p in zip(target_sentences, greedy_preds_sentences): # if t != p: # for tw, pw in zip(t.split(' '), p.split(' ')): # if tw != pw and pw not in vocabs_set: # candidats = defaultdict(list) # best_d = 100 # for w in vocabs_set: # d = ed.eval(w, pw) # if d == best_d: # candidats[d].append(w) # elif d < best_d: # candidats = defaultdict(list) # candidats[d].append(w) # best_d = d # else: # continue # print(f'target = {tw} | pred = {pw}') # input(candidats) # resp = bk_tree.query(pw, 2) # input(f'{tw} | {pw}\n{resp}') lm_preds = [] for t, p in tqdm(zip(target_sentences, greedy_preds_sentences), total=len(target_sentences)): new_source = p pw = p.split(' ') if any([w not in vocabs_set for w in pw]): source = ' '.join([ tokenizer.mask_token if w not in vocabs_set else w for w in pw ]) enc_source = tokenizer.encode(source, return_tensors='pt') mask_token_index = torch.where( enc_source == tokenizer.mask_token_id)[1] token_logits = model(enc_source)[0] mask_token_logits = token_logits[0, mask_token_index, :] top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist() for token in top_5_tokens: if tokenizer.decode([token]) in vocabs_set: new_source = source.replace(tokenizer.mask_token, tokenizer.decode([token])) break # new_source = source.replace(tokenizer.mask_token, tokenizer.decode([top_5_tokens[0]])) lm_preds.append(new_source) print( Data.compute_scores(targets=target_sentences, predictions=lm_preds, rec=False))
def trainWithMinibootstrap(self, negatives, positives, output_dir=None): caches = [] model = [] t = time.time() for i in range(self.num_classes - 1): if (len(positives[i]) != 0) & (len(negatives[i]) != 0): print( '---------------------- Training Class number {} ----------------------' .format(i)) first_time = True for j in range(len(negatives[i])): t_iter = time.time() if first_time: dataset = {} dataset['pos'] = positives[i] dataset['neg'] = negatives[i][j] caches.append(dataset) model.append(None) first_time = False else: t_hard = time.time() neg_pred = self.classifier.predict( model[i], negatives[i][j]) hard_idx = torch.where(neg_pred > self.hard_tresh)[0] caches[i]['neg'] = torch.cat( (caches[i]['neg'], negatives[i][j][hard_idx]), 0) print('Hard negatives selected in {} seconds'.format( time.time() - t_hard)) print('Chosen {} hard negatives from the {}th batch'. format(len(hard_idx), j)) print('Traning with {} positives and {} negatives'.format( len(caches[i]['pos']), len(caches[i]['neg']))) t_update = time.time() model[i] = self.updateModel(caches[i]) print('Model updated in {} seconds'.format(time.time() - t_update)) t_easy = time.time() if len(caches[i]['neg']) != 0 and not j == len( negatives[i]) - 1: neg_pred = self.classifier.predict( model[i], caches[i]['neg']) keep_idx = torch.where(neg_pred >= self.easy_tresh)[0] easy_idx = len(caches[i]['neg']) - len(keep_idx) caches[i]['neg'] = caches[i]['neg'][keep_idx] print('Easy negatives selected in {} seconds'.format( time.time() - t_easy)) print('Removed {} easy negatives. {} Remaining'.format( easy_idx, len(caches[i]['neg']))) print('Iteration {}th done in {} seconds'.format( j, time.time() - t_iter)) # Delete cache of the i-th classifier if it is the last iteration to free memory if j == len(negatives[i]) - 1 and not self.return_caches: caches[i] = None torch.cuda.empty_cache() else: model.append(None) dataset = {} caches.append(dataset) training_time = time.time() - t print('Online Classifier trained in {} seconds'.format(training_time)) if output_dir and self.is_rpn: with open(os.path.join(output_dir, "result.txt"), "a") as fid: fid.write( "RPN's Online Classifier training time: {}min:{}s \n". format(int(training_time / 60), round(training_time % 60))) elif output_dir and self.is_segmentation: with open(os.path.join(output_dir, "result.txt"), "a") as fid: fid.write( "Online Segmentation training time: {}min:{}s \n".format( int(training_time / 60), round(training_time % 60))) elif output_dir and not self.is_rpn and not self.is_segmentation: with open(os.path.join(output_dir, "result.txt"), "a") as fid: fid.write( "Detector's Online Classifier training time: {}min:{}s \n". format(int(training_time / 60), round(training_time % 60))) if self.return_caches: self.caches = caches return model
def _process_feature_extraction(self, output, im_scales, im_infos, feature_name="fc6", conf_thresh=0): batch_size = len(output[0]["proposals"]) n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]] score_list = output[0]["scores"].split(n_boxes_per_image) score_list = [torch.nn.functional.softmax(x, -1) for x in score_list] feats = output[0][feature_name].split(n_boxes_per_image) cur_device = score_list[0].device feat_list = [] info_list = [] for i in range(batch_size): dets = output[0]["proposals"][i].bbox / im_scales[i] scores = score_list[i] max_conf = torch.zeros(scores.shape[0]).to(cur_device) conf_thresh_tensor = torch.full_like(max_conf, conf_thresh) start_index = 1 # Column 0 of the scores matrix is for the background class if self.args.background: start_index = 0 for cls_ind in range(start_index, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.5) max_conf[keep] = torch.where( # Better than max one till now and minimally greater # than conf_thresh (cls_scores[keep] > max_conf[keep]) & (cls_scores[keep] > conf_thresh_tensor[keep]), cls_scores[keep], max_conf[keep], ) sorted_scores, sorted_indices = torch.sort(max_conf, descending=True) num_boxes = (sorted_scores[:self.args.num_features] != 0).sum() keep_boxes = sorted_indices[:self.args.num_features] feat_list.append(feats[i][keep_boxes]) bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i] # Predict the class label using the scores objects = torch.argmax(scores[keep_boxes][:, start_index:], dim=1) info_list.append({ "bbox": bbox.cpu().numpy(), "num_boxes": num_boxes.item(), "objects": objects.cpu().numpy(), "cls_prob": scores[keep_boxes][:, start_index:].cpu().numpy(), "image_width": im_infos[i]["width"], "image_height": im_infos[i]["height"], }) return feat_list, info_list
def main(): coeff = [] ang_sb = [] ang_np = [] p_angles = [] inputs_train, targets_train, inputs_test, targets_test = data.generate_data( args) results = { 'inputs_train': inputs_train, 'targets_train': targets_train, 'inputs_test': inputs_test, 'targets_test': targets_test } # Actual Model that is being observed mdl = model.create_model(args, inputs_train, targets_train) print( "\n===============================================================================================\n" ) start_params = mdl.params_flat # NOTE Pickling Initial Weights with open('outfile', 'wb') as sp: pickle.dump(mdl.params_flat, sp) new_params = train_model(args, mdl, results) with open('outfile', 'rb') as sp: start_params = pickle.load(sp) # NOTE Lottery Ticket Pruning Loop per = args.per nonzer = (np.count_nonzero(mdl.params_flat)) zer = len(mdl.params_flat) - nonzer x1 = nonzer - zer z1 = int(((x1 / 100.) * per)) zer = z1 + zer print(" {} + {} = {}".format(0, nonzer, len(mdl.params_flat))) new_params, inputs, outputs = train_model(args, mdl, results) hess = mdl.hessian(mdl.params_flat) # Calculating Hessian # Converting the Hessian to Tensor hess = torch.tensor(hess).float() eigenvalues, eigenvec = torch.symeig(hess, eigenvectors=True) hess, eigenvalues, eigenvec, coeff, ang_np, ang_sb, p_angles, top_vec = invar( mdl, args, inputs_train, targets_train, hess, eigenvalues, eigenvec, coeff, ang_np, ang_sb, p_angles) # NOTE Pruning Loop print( "===============================================================================================\n" ) for i in tqdm(range(0, args.prune_iter), desc="Pruning Progress", dynamic_ncols=True): print("\n{} +".format(zer)), pruned_params_flat, zer, nonzer = prune_function(mdl, zer) print("{} = {}".format(nonzer, len(mdl.params_flat))) x1 = nonzer - zer z1 = int((x1 / 100.) * per) zer = z1 + zer for p in range(0, len(start_params)): if (pruned_params_flat[p] != 0.): pruned_params_flat[p] = start_params[p] mdl.params_flat = pruned_params_flat new_params, coeff = train_pruned_model(args, mdl, results, top_vec, coeff) coeff = torch.tensor(coeff) for i in range(coeff.shape[0]): a = torch.zeros(coeff[i].shape[0]).long() b = torch.arange(0, coeff[i].shape[0]) c = torch.where(((coeff[i] > -0.1) & (coeff[i] < 0.1)), b, a) z = torch.zeros(coeff[i].shape[0]).fill_(0) z[torch.nonzero(c)] = coeff[i][torch.nonzero(c)] z = np.array(z) plt.plot(z) plt.xlabel('Dimension', fontsize=14) plt.ylabel('Coefficient', fontsize=14) pnpy = args.results_folder + '/plot1.png' plt.savefig(pnpy, format='png', pad_inches=5) args.suffix = args.results_folder + '/coeff.npy' np.save(args.suffix, coeff) args.suffix = args.results_folder + '/ang_sb.npy' np.save(args.suffix, ang_sb) args.suffix = args.results_folder + '/ang_np.npy' np.save(args.suffix, ang_np) args.suffix = args.results_folder + '/p_angles.npy' np.save(args.suffix, p_angles) return args.results_folder
def forward(self, x): output = torch.where(x >= 0, torch.ones_like(x), -torch.ones_like(x)) return output
rc("text", usetex=True) X_0 = X_0.cpu() Y_0 = Y_0.cpu() # +++ ground truth +++ fig, ax = plt.subplots(clear=True, figsize=(2.5, 2.5), dpi=200) im = _implot(ax, X_0) # method-wise plots for (idx, method) in methods.iterrows(): # +++ reconstructions per noise level +++ for idx_noise in range(len(noise_rel_show)): idx_noise_cur = torch.where( noise_rel == noise_rel_show[idx_noise])[0] X_cur = results.loc[idx].X[idx_noise_cur, ...].squeeze(0) fig, ax = plt.subplots( 1, 2, clear=True, figsize=(5, 2.5), dpi=200, gridspec_kw={"wspace": 0.02}, ) im = _implot(ax[0], X_cur) im = _implot(ax[1], (X_cur - X_0[0:1, ...]).abs(), vmax=0.6)
def forward(self, classifications, regressions, anchors, annotations, **kwargs): alpha = 0.25 gamma = 2.0 batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] anchor = anchors[0, :, :] # assuming all image sizes are the same, which it is dtype = anchors.dtype anchor_widths = anchor[:, 3] - anchor[:, 1] anchor_heights = anchor[:, 2] - anchor[:, 0] anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] bbox_annotation = annotations[j] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) if bbox_annotation.shape[0] == 0: if torch.cuda.is_available(): alpha_factor = torch.ones_like(classification) * alpha alpha_factor = alpha_factor.cuda() alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(torch.log(1.0 - classification)) cls_loss = focal_weight * bce regression_losses.append(torch.tensor(0).to(dtype).cuda()) classification_losses.append(cls_loss.sum()) else: alpha_factor = torch.ones_like(classification) * alpha alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(torch.log(1.0 - classification)) cls_loss = focal_weight * bce regression_losses.append(torch.tensor(0).to(dtype)) classification_losses.append(cls_loss.sum()) continue IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4]) IoU_max, IoU_argmax = torch.max(IoU, dim=1) # compute the loss for classification targets = torch.ones_like(classification) * -1 if torch.cuda.is_available(): targets = targets.cuda() targets[torch.lt(IoU_max, 0.4), :] = 0 positive_indices = torch.ge(IoU_max, 0.5) num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 alpha_factor = torch.ones_like(targets) * alpha if torch.cuda.is_available(): alpha_factor = alpha_factor.cuda() alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) cls_loss = focal_weight * bce zeros = torch.zeros_like(cls_loss) if torch.cuda.is_available(): zeros = zeros.cuda() cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros) classification_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.to(dtype), min=1.0)) if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # efficientdet style gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack((targets_dy, targets_dx, targets_dh, targets_dw)) targets = targets.t() regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0 ) regression_losses.append(regression_loss.mean()) else: if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).to(dtype).cuda()) else: regression_losses.append(torch.tensor(0).to(dtype)) # debug imgs = kwargs.get('imgs', None) if imgs is not None: regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() obj_list = kwargs.get('obj_list', None) out = postprocess(imgs.detach(), torch.stack([anchors[0]] * imgs.shape[0], 0).detach(), regressions.detach(), classifications.detach(), regressBoxes, clipBoxes, 0.5, 0.3) imgs = imgs.permute(0, 2, 3, 1).cpu().numpy() imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255).astype(np.uint8) imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs] display(out, imgs, obj_list, imshow=False, imwrite=True) return [torch.stack(classification_losses).mean(dim=0), \ torch.stack(regression_losses).mean( dim=0) * 50] # https://github.com/google/automl/blob/6fdd1de778408625c1faf368a327fe36ecd41bf7/efficientdet/hparams_config.py#L233
def train_class_one_t(dhandler_class, dhandlers_rp, dec, d_hnet, net, device, config, writer, t): """Train continual learning experiments on MNIST dataset for one task. In this function the main training logic is implemented. After setting the optimizers for the network and hypernetwork if applicable, the training is structured as follows: First, we get the a training batch of the current task. Depending on the learning scenario, we choose output heads and build targets accordingly. Second, if ``t`` is greater than 1, we add a loss term concerning predictions of replayed data. See :func:`get_fake_data_loss` for details. Third, to protect the hypernetwork from forgetting, we add an additional L2 loss term namely the difference between its current output given an embedding and checkpointed targets. Finally, we track some training statistics. Args: (....): See docstring of function :func:`train_tasks`. t: Task id. """ # if cl with task inference we have the classifier empowered with a hnet if config.training_with_hnet: net_hnet = net[1] net = net[0] net.train() net_hnet.train() params_to_regularize = list(net_hnet.theta) optimizer = optim.Adam(params_to_regularize, lr=config.class_lr, betas=(0.9, 0.999)) c_emb_optimizer = optim.Adam([net_hnet.get_task_emb(t)], lr=config.class_lr_emb, betas=(0.9, 0.999)) else: net.train() net_hnet = None optimizer = optim.Adam(net.parameters(), lr=config.class_lr, betas=(0.9, 0.999)) # dont train the replay model if available if dec is not None: dec.eval() if d_hnet is not None: d_hnet.eval() # compute targets if classifier is trained with hnet if t > 0 and config.training_with_hnet: if config.online_target_computation: # Compute targets for the regularizer whenever they are needed. # -> Computationally expensive. targets_C = None prev_theta = [p.detach().clone() for p in net_hnet.theta] prev_task_embs = [p.detach().clone() for p in \ net_hnet.get_task_embs()] else: # Compute targets for the regularizer once and keep them all in # memory -> Memory expensive. targets_C = hreg.get_current_targets(t, net_hnet) prev_theta = None prev_task_embs = None dhandler_class.reset_batch_generator() # make copy of network if t >= 1: net_copy = copy.deepcopy(net) # set training_iterations if epochs are set if config.epochs == -1: training_iterations = config.n_iter else: assert(config.epochs > 0) training_iterations = config.epochs * \ int(np.ceil(dhandler_class.num_train_samples / config.batch_size)) if config.class_incremental: training_iterations = int(training_iterations/config.out_dim) # Whether we will calculate the regularizer. calc_reg = t > 0 and config.class_beta > 0 and config.training_with_hnet # set if we want the reg only computed for a subset of the previous tasks if config.hnet_reg_batch_size != -1: hnet_reg_batch_size = config.hnet_reg_batch_size else: hnet_reg_batch_size = None for i in range(training_iterations): # set optimizer to zero optimizer.zero_grad() if net_hnet is not None: c_emb_optimizer.zero_grad() # Get real data real_batch = dhandler_class.next_train_batch(config.batch_size) X_real = dhandler_class.input_to_torch_tensor(real_batch[0], device, mode='train') T_real = dhandler_class.output_to_torch_tensor(real_batch[1],device, mode='train') if i % 100 == 0 and config.show_plots: fig_real = _plotImages(X_real, config) writer.add_figure('train_class_' + str(t) + '_real', fig_real, global_step=i) ################################################# # Choosing output heads and constructing targets ################################################# # If we train a task inference net or class incremental learning we # we construct a target for every single class/task if config.class_incremental or config.training_task_infer: # in the beginning of training, we look at two output neuron task_out = [0, t+1] T_real = torch.zeros((config.batch_size, task_out[1])).to(device) T_real[:, task_out[1] - 1] = 1 elif config.cl_scenario == 1 or config.cl_scenario == 2: if config.cl_scenario == 1: # take the task specific output neuron task_out = [t*config.out_dim, t*config.out_dim + config.out_dim] else: # always all output neurons, only one head is used task_out = [0, config.out_dim] else: # The number of output neurons is generic and can grow i.e. we # do not have to know the number of tasks before we start # learning. if not config.infer_output_head: task_out = [0,(t+1)*config.out_dim] T_real = torch.cat((torch.zeros((config.batch_size, t * config.out_dim)).to(device), T_real), dim=1) # this is a special case where we will infer the task id by another # neural network so we can train on the correct output head direclty # and use the infered output head to compute the prediction else: task_out =[t*config.out_dim, t*config.out_dim + config.out_dim] # compute loss of current data if config.training_with_hnet: weights_c = net_hnet.forward(t) else: weights_c = None # print('weight_c: ',weights_c) Y_hat_logits = net.forward(X_real, weights_c) Y_hat_logits = Y_hat_logits[:, task_out[0]:task_out[1]] if config.soft_targets: soft_label = 0.95 num_classes = T_real.shape[1] soft_targets = torch.where(T_real == 1, torch.Tensor([soft_label]).to(device), torch.Tensor([(1 - soft_label) / (num_classes-1)]).to(device)) soft_targets = soft_targets.to(device) loss_task = Classifier.softmax_and_cross_entropy(Y_hat_logits, soft_targets) else: # print('Y_hat_logits: ',Y_hat_logits.size()) # print('T_real: ',T_real.size()) # print('task_out: ',task_out) loss_task =Classifier.softmax_and_cross_entropy(Y_hat_logits,T_real) ############################ # compute loss for fake data ############################ # Get fake data (of all tasks up until now and merge into list) if t >= 1 and not config.training_with_hnet: fake_loss = get_fake_data_loss(dhandlers_rp, net, dec,d_hnet,device, config, writer, t, i, net_copy) loss_task = (1-config.l_rew)*loss_task + config.l_rew*fake_loss loss_task.backward(retain_graph=calc_reg, create_graph=calc_reg and \ config.backprop_dt) # compute hypernet loss and fix embedding -> change current embs if calc_reg: if config.no_lookahead: dTheta = None else: dTheta = opstep.calc_delta_theta(optimizer, config.use_sgd_change, lr=config.class_lr, detach_dt=not config.backprop_dt) loss_reg = config.class_beta*hreg.calc_fix_target_reg(net_hnet, t, targets=targets_C, mnet=net, dTheta=dTheta, dTembs=None, prev_theta=prev_theta, prev_task_embs=prev_task_embs, batch_size=hnet_reg_batch_size) loss_reg.backward() # compute backward passloss_task.backward() if not config.dont_train_main_model: optimizer.step() if net_hnet is not None and config.train_class_embeddings: c_emb_optimizer.step() # same stats saving if i % 50 == 0: # compute accuracies for tracking Y_hat_logits = net.forward(X_real, weights_c) Y_hat_logits = Y_hat_logits[:, task_out[0]:task_out[1]] Y_hat = F.softmax(Y_hat_logits, dim=1) classifier_accuracy = Classifier.accuracy(Y_hat, T_real) * 100.0 writer.add_scalar('train/task_%d/class_accuracy' % t, classifier_accuracy, i) writer.add_scalar('train/task_%d/loss_task' % t, loss_task, i) if t >= 1 and not config.training_with_hnet: writer.add_scalar('train/task_%d/fake_loss' % t, fake_loss, i) # plot some gradient statistics if i % 200 == 0: if not config.dont_train_main_model: total_norm = 0 if config.training_with_hnet: params = net_hnet.theta else: params = net.parameters() for p in params: param_norm = p.grad.data.norm(2) total_norm += param_norm.item() ** 2 total_norm = total_norm ** (1. / 2) # TODO write gradient histograms? writer.add_scalar('train/task_%d/main_params_grad_norms' % t, total_norm, i) if net_hnet is not None and config.train_class_embeddings: total_norm = 0 for p in [net_hnet.get_task_emb(t)]: param_norm = p.grad.data.norm(2) total_norm += param_norm.item() ** 2 total_norm = total_norm ** (1. / 2) writer.add_scalar('train/task_%d/hnet_emb_grad_norms' % t, total_norm, i) if i % 200 == 0: msg = 'Training step {}: Classifier Accuracy: {:.3f} ' + \ '(on current training batch).' print(msg.format(i, classifier_accuracy))
def sample_fn(model): sample = torch.sigmoid(model.sample(n_samples=16)) return torch.where(sample < 0.5, torch.zeros_like(sample), torch.ones_like(sample))
def reset_state(self, h, d): return torch.where(d.unsqueeze(-1), torch.zeros_like(h), h)
def entropy(self): if len(self.masks) == 0: return super(CategoricalMasked, self).entropy() p_log_p = self.logits * self.probs p_log_p = torch.where(self.masks, p_log_p, torch.tensor(0.).to(device)) return -p_log_p.sum(-1)
def eval_single_comparison_with_SVM_hinge_loss(o_p_ref,comparisons,hrjf,o_ori_img): # print("o_p_ref",o_p_ref.shape) # o_p_ref torch.Size([3, 3, 384, 512]) # c refl_img: mean of one predicted reflectance # refl_img=torch.mean(o_p_ref,dim=0,keepdim=True).squeeze() # print("m_o_p_ref",m_o_p_ref.shape) # m_o_p_ref torch.Size([341, 512]) refl_img=o_p_ref.squeeze() # -------------------------------------------------- rows=o_ori_img.shape[0] cols=o_ori_img.shape[1] # -------------------------------------------------- # c error_sum: sum all errors from all comparisons in one image error_sum=0.0 # c weight_sum: sum all weights from all comparisons in one image weight_sum=0.0 num_comp=float(len(comparisons)) # -------------------------------------------------- # JSON GT for 1 image, # containing all relative reflectance comparisons information # c c: one comparison from 1 image's all comparisons for c in comparisons: # c n_po1: number of point1 n_po1=c['point1'] # c point1: Point1 from one comparison point1 = hrjf.id_to_points[n_po1] n_po2=c['point2'] # c point2: Point2 from one comparison point2=hrjf.id_to_points[n_po2] # c darker: Darker information from one comparison darker=c['darker'] # Weight information from one comparison weight=c['darker_score'] # 1.14812035203497 # print("weight",weight) # -------------------------------------------------- # Check exception if not point1['opaque'] or not point2['opaque']: # Pass this judgement continue # weight<0 or weight is None -> invalid darker_score so pass if weight<0 or weight is None: raise ValueError("Invalid darker_score: %s" % weight) if darker not in ('1','2','E'): raise ValueError("Invalid darker: %s" % darker) # -------------------------------------------------- x1,y1,x2,y2,darker=int(point1['x']*cols),\ int(point1['y']*rows),\ int(point2['x']*cols),\ int(point2['y']*rows),\ darker # -------------------------------------------------- # c R1: scalar intensity value of point1 from predicted intensity image R1=refl_img[y1,x1] R2=refl_img[y2,x2] R2=torch.where( torch.abs(R2)<1e-4,torch.Tensor([1e-4]).squeeze().cuda(),R2) # -------------------------------------------------- div_R1_R2=torch.div(R1,R2) # c dx_inv: 1+delta+xi inverse dx_inv=(1.0/(1.0+delta+xi)) # c dx: 1+delta+xi dx=(1.0+delta+xi) # c dx_m_inv: 1+delta-i inverse dx_m_inv=(1.0/(1.0+delta-xi)) # c dx_m: 1+delta-xi dx_m=(1.0+delta-xi) # -------------------------------------------------- if darker=='1': # c ersp: error of single pair ersp=torch.max(torch.Tensor([0.0]).cuda(),div_R1_R2-dx_inv) error_sum+=ersp weight_sum+=weight elif darker=='2': ersp=torch.max(torch.Tensor([0.0]).cuda(),dx-div_R1_R2) error_sum+=ersp weight_sum+=weight elif darker=='E': if xi<=delta: ersp=torch.max(torch.Tensor([0.0]).cuda(),dx_m_inv-div_R1_R2) error_sum+=ersp weight_sum+=weight else: ersp=torch.max(torch.Tensor([0.0]).cuda(),div_R1_R2-dx_m) error_sum+=ersp weight_sum+=weight # Now, you have processed all comparisons in one image # If weight_sum exist if weight_sum: # c whdr: calculated whdr of one image whdr=error_sum/weight_sum # If weight_sum=0, it means there's no comparisons # In that case, you assign 0 into whdr else: whdr=0.0 # Return whdr score of one image return whdr/num_comp
def beta_smooth_l1_loss(input: Tensor, target: Tensor, beta: float) -> Tensor: diff = torch.abs(input - target) # 计算 坐标的 差值 loss = torch.where(diff < beta, 0.5 * diff**2 / beta, diff - 0.5 * beta) loss = loss.sum() / (input.numel() + 1e-8) # 数值稳定 1e-8 return loss
print(x[0].shape) # x[0, :] print(x[:, 0].shape) print(x[2, 0:10].shape) # 0:10 --> [0, 1, 2, ... 9] print(x[0, 0]) # Fancy indexing x = torch.arange(10) indices = [2, 5, 8] print(x[indices]) x = torch.rand((3, 5)) rows = torch.tensor([1, 0]) cols = torch.tensor([4, 0]) print(x[rows, cols]) # More advanced indexing x = torch.arange(10) print(x[(x < 2) | (x > 8)]) print(x[x.remainder(2) == 0]) # Other stuff print(torch.where(x > 5, x, x * 2)) print(torch.tensor([0, 0, 1, 2, 2, 3, 4]).unique()) print(x.ndimension()) # 5x5x5 ==> 3 print(x.numel()) # number of elements
def train(self, train_examples, task_name, output_mode, eval_labels, num_labels, train_dataloader, eval_dataloader, eval_examples, tokenizer, mm_eval_labels, mm_eval_dataloader): """ quant-aware pretraining + KD """ # Prepare loss functions loss_mse = MSELoss() self.teacher_model.eval() teacher_results = self._do_eval(self.teacher_model, task_name, eval_dataloader, output_mode, eval_labels, num_labels) logging.info("Teacher network evaluation") for key in sorted(teacher_results.keys()): logging.info(" %s = %s", key, str(teacher_results[key])) self.teacher_model.train( ) # switch to train mode to supervise students # Train and evaluate # num_layers = self.student_model.config.num_hidden_layers + 1 global_step = self.prev_global_step best_dev_acc = 0.0 output_eval_file = os.path.join(self.args.output_dir, "eval_results.txt") logging.info("***** Running training, Task: %s, Job id: %s*****" % (self.args.task_name, self.args.job_id)) logging.info(" Distill rep attn: %d, Distill logit: %d" % (self.args.distill_rep_attn, self.args.distill_logit)) logging.info(" Num examples = %d", len(train_examples)) logging.info(" Batch size = %d", self.args.batch_size) logging.info(" Num steps = %d", self.num_train_optimization_steps) global_tr_loss = 0 # record global average training loss to plot for epoch_ in range(self.args.num_train_epochs): tr_loss = 0. tr_att_loss = 0. tr_rep_loss = 0. tr_cls_loss = 0. nb_tr_examples, nb_tr_steps = 0, 0 for step, batch in enumerate(train_dataloader): self.student_model.train() batch = tuple(t.to(self.device) for t in batch) input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch att_loss = 0. rep_loss = 0. cls_loss = 0. rep_loss_layerwise = [] att_loss_layerwise = [] student_logits, student_atts, student_reps = self.student_model( input_ids, segment_ids, input_mask) if self.args.distill_logit or self.args.distill_rep_attn: # use distillation with torch.no_grad(): teacher_logits, teacher_atts, teacher_reps = self.teacher_model( input_ids, segment_ids, input_mask) # NOTE: config loss according to stage loss = 0. if self.args.distill_logit: cls_loss = soft_cross_entropy( student_logits / self.args.temperature, teacher_logits / self.args.temperature) loss += cls_loss tr_cls_loss += cls_loss.item() if self.args.distill_rep_attn: for student_att, teacher_att in zip( student_atts, teacher_atts): student_att = torch.where( student_att <= -1e2, torch.zeros_like(student_att).to(self.device), student_att) teacher_att = torch.where( teacher_att <= -1e2, torch.zeros_like(teacher_att).to(self.device), teacher_att) tmp_loss = loss_mse(student_att, teacher_att) att_loss += tmp_loss att_loss_layerwise.append(tmp_loss.item()) for student_rep, teacher_rep in zip( student_reps, teacher_reps): tmp_loss = loss_mse(student_rep, teacher_rep) rep_loss += tmp_loss rep_loss_layerwise.append(tmp_loss.item()) tr_att_loss += att_loss.item() tr_rep_loss += rep_loss.item() loss += rep_loss + att_loss else: if output_mode == "classification": loss_fct = CrossEntropyLoss() loss = loss_fct(student_logits, label_ids.view(-1)) elif output_mode == "regression": loss_mse = MSELoss() loss = loss_mse(student_logits.view(-1), label_ids.view(-1)) if self.n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if self.args.gradient_accumulation_steps > 1: loss = loss / self.args.gradient_accumulation_steps loss.backward() tr_loss += loss.item() global_tr_loss += loss.item() nb_tr_examples += label_ids.size(0) nb_tr_steps += 1 # evaluation and save model if global_step % self.args.eval_step == 0 or \ global_step == len(train_dataloader)-1: # logging.info("***** KDLearner %s Running evaluation, Task: %s, Job_id: %s *****" % (stage, self.args.task_name, self.args.job_id)) logging.info(" Epoch = {} iter {} step".format( epoch_, global_step)) logging.info(" Num examples = %d", len(eval_examples)) logging.info(f" Previous best = {best_dev_acc}") loss = tr_loss / (step + 1) global_avg_loss = global_tr_loss / (global_step + 1) cls_loss = tr_cls_loss / (step + 1) att_loss = tr_att_loss / (step + 1) rep_loss = tr_rep_loss / (step + 1) self.student_model.eval() result = self._do_eval(self.student_model, task_name, eval_dataloader, output_mode, eval_labels, num_labels) result['global_step'] = global_step result['cls_loss'] = cls_loss result['att_loss'] = att_loss result['rep_loss'] = rep_loss result['loss'] = loss result['global_loss'] = global_avg_loss preds = student_logits.detach().cpu().numpy() train_label = label_ids.cpu().numpy() if output_mode == "classification": preds = np.argmax(preds, axis=1) elif output_mode == "regression": preds = np.squeeze(preds) result['train_batch_acc'] = list( compute_metrics(task_name, preds, train_label).values())[0] if self.args.distill_rep_attn: logging.info("embedding layer rep_loss: %.8f" % (rep_loss_layerwise[0])) rep_loss_layerwise = rep_loss_layerwise[1:] for lid in range(len(rep_loss_layerwise)): logging.info("layer %d rep_loss: %.8f" % (lid + 1, rep_loss_layerwise[lid])) logging.info("layer %d att_loss: %.8f" % (lid + 1, att_loss_layerwise[lid])) result_to_file(result, output_eval_file) save_model = False if task_name in acc_tasks and result['acc'] > best_dev_acc: best_dev_acc = result['acc'] save_model = True if task_name in corr_tasks and result[ 'corr'] > best_dev_acc: best_dev_acc = result['corr'] save_model = True if task_name in mcc_tasks and result['mcc'] > best_dev_acc: best_dev_acc = result['mcc'] save_model = True if save_model: self._save() if task_name == "mnli": logging.info('MNLI-mm Evaluation') result = self._do_eval(self.student_model, 'mnli-mm', mm_eval_dataloader, output_mode, mm_eval_labels, num_labels) result['global_step'] = global_step tmp_output_eval_file = os.path.join( self.output_dir + '-MM', "eval_results.txt") result_to_file(result, tmp_output_eval_file) # if self.args.quantize_weight: # self.quanter.restore() if (step + 1) % self.args.gradient_accumulation_steps == 0: self.optimizer.step() self.optimizer.zero_grad() global_step += 1
def get_inpainting_mask(x): mask = torch.ones(x.shape, device=x.device) bs, x, y = torch.where(x.sum(dim=1) == -3) mask[bs, :, x, y] = 0 return mask
def forward(self, images, targets=None): # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]] """ Arguments: images (list[Tensor]): images to be processed targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") if self.training: assert targets is not None for target in targets: boxes = target["boxes"] if isinstance(boxes, torch.Tensor): if len(boxes.shape) != 2 or boxes.shape[-1] != 4: raise ValueError("Expected target boxes to be a tensor" "of shape [N, 4], got {:}.".format( boxes.shape)) else: raise ValueError("Expected target boxes to be of type " "Tensor, got {:}.".format(type(boxes))) # get the original image sizes original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], []) for img in images: val = img.shape[-2:] assert len(val) == 2 original_image_sizes.append((val[0], val[1])) # transform the input images, targets = self.transform(images, targets) # Check for degenerate boxes # TODO: Move this to a function if targets is not None: for target_idx, target in enumerate(targets): boxes = target["boxes"] degenerate_boxes = boxes[:, 2:] <= boxes[:, :2] if degenerate_boxes.any(): # print the first degenerate box bb_idx = torch.where(degenerate_boxes.any(dim=1))[0][0] degen_bb: List[float] = boxes[bb_idx].tolist() raise ValueError( "All bounding boxes should have positive height and width." " Found invalid box {} for target at index {}.".format( degen_bb, target_idx)) # get the features from the backbone features = self.backbone(images.tensors) if isinstance(features, torch.Tensor): features = OrderedDict([('0', features)]) # TODO: Do we want a list or a dict? features = list(features.values()) # compute the retinanet heads outputs using the features head_outputs = self.head(features) # create the set of anchors anchors = self.anchor_generator(images, features) losses = {} detections = torch.jit.annotate(List[Dict[str, Tensor]], []) if self.training: assert targets is not None # compute the losses losses = self.compute_loss(targets, head_outputs, anchors) else: # compute the detections # print(self.ssm) if self.ssm: detections = self.ssm_postprocess_detections( head_outputs, anchors, images.image_sizes) else: detections = self.postprocess_detections( head_outputs, anchors, images.image_sizes) detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes) if torch.jit.is_scripting(): if not self._has_warned: warnings.warn( "RetinaNet always returns a (Losses, Detections) tuple in scripting" ) self._has_warned = True return (losses, detections) return self.eager_outputs(losses, detections)
def forward( self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None, labels=None, output_attentions=None, output_hidden_states=None, return_dict=None, ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): Labels for computing the token classification loss. Indices should be in ``[0, ..., config.num_labels - 1]``. """ return_dict = return_dict if return_dict is not None else self.config.use_return_dict outputs = self.transformer( input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict, ) sequence_output = outputs[0] sequence_output = self.dropout(sequence_output) logits = self.classifier(sequence_output) loss = None if labels is not None: loss_fct = CrossEntropyLoss() # Only keep active parts of the loss if attention_mask is not None: active_loss = attention_mask.view(-1) == 1 active_logits = logits.view(-1, self.num_labels) active_labels = torch.where( active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)) loss = loss_fct(active_logits, active_labels) else: loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) if not return_dict: output = (logits, ) + outputs[2:] return ((loss, ) + output) if loss is not None else output return TokenClassifierOutput( loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions, )
def compute_vertex_normal(vertices: torch.Tensor, indices: torch.Tensor): """ Compute vertex normal by weighted average of nearby face normals using Nelson Max's algorithm. See `Weights for Computing Vertex Normals from Facet Vectors <https://escholarship.org/content/qt7657d8h3/qt7657d8h3.pdf?t=ptt283>`_. Args ==== vertices: torch.Tensor 3D position of vertices float32 tensor with size num_vertices x 3 indices: torch.Tensor vertex indices of triangle faces. int32 tensor with size num_triangles x 3 Returns ======= tf.Tensor float32 Tensor with size num_vertices x 3 representing vertex normal """ def dot(v1, v2): return torch.sum(v1 * v2, dim = 1) def squared_length(v): return torch.sum(v * v, dim = 1) def length(v): return torch.sqrt(squared_length(v)) def safe_asin(v): # Hack: asin(1)' is infinite, so we want to clamp the contribution return torch.asin(v.clamp(0, 1-1e-6)) normals = torch.zeros(vertices.shape, dtype = torch.float32, device = vertices.device) v = [vertices[indices[:, 0].long(), :], vertices[indices[:, 1].long(), :], vertices[indices[:, 2].long(), :]] for i in range(3): v0 = v[i] v1 = v[(i + 1) % 3] v2 = v[(i + 2) % 3] e1 = v1 - v0 e2 = v2 - v0 e1_len = length(e1) e2_len = length(e2) side_a = e1 / torch.reshape(e1_len, [-1, 1]) side_b = e2 / torch.reshape(e2_len, [-1, 1]) if i == 0: n = torch.cross(side_a, side_b) n = torch.where(length(n).reshape(-1, 1).expand(-1, 3) > 0, n / torch.reshape(length(n), [-1, 1]), torch.zeros(n.shape, dtype=n.dtype, device=n.device)) angle = torch.where(dot(side_a, side_b) < 0, math.pi - 2.0 * safe_asin(0.5 * length(side_a + side_b)), 2.0 * safe_asin(0.5 * length(side_b - side_a))) sin_angle = torch.sin(angle) # XXX: Inefficient but it's PyTorch's limitation e1e2 = e1_len * e2_len # contrib is 0 when e1e2 is 0 contrib = torch.where(e1e2.reshape(-1, 1).expand(-1, 3) > 0, n * (sin_angle / e1e2).reshape(-1, 1).expand(-1, 3), torch.zeros(n.shape, dtype = torch.float32, device = vertices.device)) index = indices[:, i].long().reshape(-1, 1).expand(-1, 3) normals.scatter_add_(0, index, contrib) # Assign 0, 0, 1 to degenerate faces degenerate_normals = torch.zeros(normals.shape, dtype = torch.float32, device = vertices.device) degenerate_normals[:, 2] = 1.0 normals = torch.where(length(normals).reshape(-1, 1).expand(-1, 3) > 0, normals / torch.reshape(length(normals), [-1, 1]), degenerate_normals) assert(torch.isfinite(normals).all()) return normals.contiguous()
def nstep_target(idx, policy_net, target_net, memory, steps=20, device='cpu', BATCH_SIZE=32, GAMMA=0.99, double_dqn=False): range_ = np.arange(0, steps + 1) idx_nReward = idx.reshape(-1, 1) + range_ _batch, _ = memory.sample(idx=idx_nReward.ravel()) n_batch = Transition(*zip(*_batch)) non_final_mask_rewards = torch.tensor( tuple(map(lambda s: s is not None, n_batch.next_state)), device=device, dtype=torch.bool).view(idx_nReward.shape) non_final_mask = torch.prod(non_final_mask_rewards[:, :-1], 1).bool() non_final_mask_r = non_final_mask_rewards[:, :-1] ##### r23 = non_final_mask_r[:, :-1] r23 = r23.t().view(r23.shape[1], r23.shape[0], 1).expand(r23.shape[1], r23.shape[0], r23.shape[1]) r12 = non_final_mask_r[:, 1:] r = torch.prod(r23, 0) * r12.long() r_mask = torch.cat([non_final_mask_rewards[:, 0].view(-1, 1).long(), r], 1) ##### rewards = tuple((map(lambda r: torch.tensor([r], device=device), n_batch.reward))) n_rewards = torch.cat(rewards).view( idx_nReward.shape)[:, 1:] * r_mask.float() gamma_n = np.geomspace(1, GAMMA**(steps - 1), steps) discounted_rewards = n_rewards * torch.from_numpy(gamma_n).float().to( device) discounted_rewards = torch.sum(discounted_rewards, axis=1).to(device) batch_future, _ = memory.sample(idx + steps - 1) batch_ = Transition(*zip(*batch_future)) # non_final_next_states = torch.cat([s for s in batch_.next_state if s is not None]).to(device) next_states_ = [s for s in batch_.next_state] non_final_next_states_mask = torch.tensor(tuple( map(lambda s: s is not None, batch_.next_state)), device=device, dtype=torch.bool) non_final_mask = non_final_next_states_mask * non_final_mask non_final_next_states = torch.cat( itemgetter(*list(torch.where( non_final_mask == 1)[0]))(next_states_)).to(device) next_state_values = torch.zeros(BATCH_SIZE, device=device) if double_dqn: max_action = policy_net(non_final_next_states).max( 1, keepdim=True)[1].detach() next_state_values[non_final_mask] = target_net( non_final_next_states).gather(1, max_action).detach().squeeze(1) else: next_state_values[non_final_mask] = target_net( non_final_next_states, double_dqn=double_dqn).max(1)[0].detach() # next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0].detach() expected_state_action_values = (next_state_values * (GAMMA**steps)) + discounted_rewards return expected_state_action_values
def _sym_normalize_adj(self, adj): deg = torch.sum(adj, dim = 0)#.squeeze() deg_inv = torch.where(deg>0, 1./torch.sqrt(deg), torch.zeros(deg.size())) deg_inv = torch.diag(deg_inv) return torch.mm(deg_inv, torch.mm(adj, deg_inv))
def _project(x, c): norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), 1e-5) maxnorm = (1 - 1e-3) / (c ** 0.5) cond = norm > maxnorm projected = x / norm * maxnorm return torch.where(cond, projected, x)
def test_pretrain(args, all_data): data_len = len(all_data) # # train_dataloader_list, valid_dataloader_list, test_dataloader_list, ent_emb_list, rel_update_weights, g_list \ # = get_all_clients(all_data, args) # # total_test_data_size = sum([len(test_dataloader_list[i].dataset) for i in range(data_len)]) # eval_weights = [len(test_dataloader_list[i].dataset) / total_test_data_size for i in range(data_len)] embedding_range = torch.Tensor([(args.gamma + args.epsilon) / args.hidden_dim]) kge_model = KGEModel(args, model_name=args.model) # rel_result = ddict(list) # rel_result_bydata = ddict(lambda : ddict(list)) results = ddict(float) for i, data in enumerate(all_data): one_results = ddict(float) state = torch.load('../LTLE/fed_state/fb15k237_fed10_client_{}.best'.format(i), map_location=args.gpu) rel_embed = state['rel_emb'].detach() ent_embed = state['ent_emb'].detach() train_dataset, valid_dataset, test_dataset, nrelation, nentity = get_task_dataset(data, args) test_dataloader_tail = DataLoader( test_dataset, batch_size=args.test_batch_size, # num_workers=max(1, args.num_cpu), collate_fn=TestDataset.collate_fn ) client_res = ddict(float) for batch in test_dataloader_tail: triplets, labels, mode = batch # triplets, labels, mode = next(test_dataloader_list[i].__iter__()) triplets, labels = triplets.to(args.gpu), labels.to(args.gpu) head_idx, rel_idx, tail_idx = triplets[:, 0], triplets[:, 1], triplets[:, 2] pred = kge_model((triplets, None), rel_embed, ent_embed, mode=mode) b_range = torch.arange(pred.size()[0], device=args.gpu) target_pred = pred[b_range, tail_idx] pred = torch.where(labels.byte(), -torch.ones_like(pred) * 10000000, pred) pred[b_range, tail_idx] = target_pred ranks = 1 + torch.argsort(torch.argsort(pred, dim=1, descending=True), dim=1, descending=False)[b_range, tail_idx] ranks = ranks.float() count = torch.numel(ranks) results['count'] += count results['mr'] += torch.sum(ranks).item() results['mrr'] += torch.sum(1.0 / ranks).item() one_results['count'] += count one_results['mr'] += torch.sum(ranks).item() one_results['mrr'] += torch.sum(1.0 / ranks).item() for k in [1, 5, 10]: results['hits@{}'.format(k)] += torch.numel(ranks[ranks <= k]) one_results['hits@{}'.format(k)] += torch.numel(ranks[ranks <= k]) for k, v in one_results.items(): if k != 'count': one_results[k] = v / one_results['count'] logging.info('mrr: {:.4f}, hits@1: {:.4f}, hits@5: {:.4f}, hits@10: {:.4f}'.format( one_results['mrr'], one_results['hits@1'], one_results['hits@5'], one_results['hits@10'])) for k, v in results.items(): if k != 'count': results[k] = v / results['count'] logging.info('mrr: {:.4f}, hits@1: {:.4f}, hits@5: {:.4f}, hits@10: {:.4f}'.format( results['mrr'], results['hits@1'], results['hits@5'], results['hits@10'])) return results
def forward( self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None, labels=None, ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`): Labels for computing the token classification loss. Indices should be in ``[0, ..., config.num_labels - 1]``. Returns: :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.RobertaConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when ``labels`` is provided) : Classification loss. scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.num_labels)`) Classification scores (before SoftMax). hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``config.output_hidden_states=True``): Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of shape :obj:`(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer plus the initial embedding outputs. attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``config.output_attentions=True``): Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape :obj:`(batch_size, num_heads, sequence_length, sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads. Examples:: from transformers import RobertaTokenizer, RobertaForTokenClassification import torch tokenizer = RobertaTokenizer.from_pretrained('roberta-base') model = RobertaForTokenClassification.from_pretrained('roberta-base') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 outputs = model(input_ids, labels=labels) loss, scores = outputs[:2] """ outputs = self.roberta( input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds, ) sequence_output = outputs[0] sequence_output = self.dropout(sequence_output) logits = self.classifier(sequence_output) outputs = (logits, ) + outputs[ 2:] # add hidden states and attention if they are here if labels is not None: loss_fct = CrossEntropyLoss() # Only keep active parts of the loss if attention_mask is not None: active_loss = attention_mask.view(-1) == 1 active_logits = logits.view(-1, self.num_labels) active_labels = torch.where( active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)) loss = loss_fct(active_logits, active_labels) else: loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) outputs = (loss, ) + outputs return outputs # (loss), scores, (hidden_states), (attentions)
def train(epoch): print("Training epoch {}".format(epoch)) rpn_accuracy_rpn_monitor = [] rpn_accuracy_for_epoch = [] regr_rpn_loss = 0 class_rpn_loss = 0 total_rpn_loss = 0 regr_class_loss = 0 class_class_loss = 0 total_class_loss = 0 count_rpn = 0 count_class = 0 for i, (image, boxes, labels, temp, num_pos) in enumerate(train_loader): count_rpn += 1 y_is_box_label = temp[0].to(device=device) y_rpn_regr = temp[1].to(device=device) image = Variable(image).to(device=device) boxes = boxes base_x, cls_k, reg_k = model_rpn(image) l1 = rpn_loss_regr(y_true=y_rpn_regr, y_pred=reg_k, y_is_box_label=y_is_box_label, lambda_rpn_regr=args.lambda_rpn_regr, device=device) l2 = rpn_loss_cls_fixed_num(y_pred=cls_k, y_is_box_label=y_is_box_label, lambda_rpn_class=args.lambda_rpn_class) regr_rpn_loss += l1.item() class_rpn_loss += l2.item() loss = l1 + l2 total_rpn_loss += loss.item() optimizer_model_rpn.zero_grad() loss.backward() optimizer_model_rpn.step() with torch.no_grad(): base_x, cls_k, reg_k = model_rpn(image) for b in range(args.train_batch): img_data = {} with torch.no_grad(): # Convert rpn layer to roi bboxes # cls_k.shape : b, h, w, 9 # reg_k : b, h, w, 36 rpn_rois = rpn_to_roi( cls_k[b, :], reg_k[b, :], no_anchors=num_anchors, all_possible_anchor_boxes=all_possible_anchor_boxes_tensor. clone()) rpn_rois.to(device=device) # can't concatenate batch # no of boxes may vary across the batch img_data["boxes"] = boxes[b].to(device=device) // downscale img_data['labels'] = labels[b] # X2 are qualified anchor boxes from model_rpn (converted anochors) # Y1 are the label, Y1[-1] is the background bounding box (negative bounding box), ambigous (neutral boxes are eliminated < min overlap thresold) # Y2 is concat of 1 , tx, ty, tw, th and 0, tx, ty, tw, th X2, Y1, Y2, _ = calc_iou(rpn_rois, img_data, class_mapping=config.label_map) X2 = X2.to(device=device) Y1 = Y1.to(device=device) Y2 = Y2.to(device=device) # If X2 is None means there are no matching bboxes if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue neg_samples = torch.where(Y1[:, -1] == 1)[0] pos_samples = torch.where(Y1[:, -1] == 0)[0] rpn_accuracy_rpn_monitor.append(pos_samples.size(0)) rpn_accuracy_for_epoch.append(pos_samples.size(0)) db = Dataset_roi(pos=pos_samples.cpu(), neg=neg_samples.cpu()) roi_loader = DataLoader(db, shuffle=True, batch_size=args.n_roi // 2, num_workers=args.workers, pin_memory=pin_memory, drop_last=False) # list(roi_loader) for j, potential_roi in enumerate(roi_loader): pos = potential_roi[0] neg = potential_roi[1] if type(pos) == list: rois = X2[neg] rpn_base = base_x[b].unsqueeze(0) Y11 = Y1[neg] Y22 = Y2[neg] # out_class : args.n_roi // 2 , # no of class elif type(neg) == list: rois = X2[pos] rpn_base = base_x[b].unsqueeze(0) #out_class : args.n_roi // 2 , # no of class Y11 = Y1[pos] Y22 = Y2[pos] else: ind = torch.cat([pos, neg]) rois = X2[ind] rpn_base = base_x[b].unsqueeze(0) #out_class: args.n_roi , # no of class Y11 = Y1[ind] Y22 = Y2[ind] count_class += 1 rois = Variable(rois).to(device=device) out_class, out_regr = model_classifier(base_x=rpn_base, rois=rois) l3 = class_loss_cls(y_true=Y11, y_pred=out_class, lambda_cls_class=args.lambda_cls_class) l4 = class_loss_regr(y_true=Y22, y_pred=out_regr, lambda_cls_regr=args.lambda_cls_regr) regr_class_loss += l4.item() class_class_loss += l3.item() loss = l3 + l4 total_class_loss += loss.item() optimizer_classifier.zero_grad() loss.backward() optimizer_classifier.step() if count_class % args.display_class == 0: if count_class == 0: print( '[Classifier] RPN Ex: {}-th ,Batch : {}, Anchor Box: {}-th, Classifier Model Classification loss: {} Regression loss: {} Total Loss: {}' .format(i, b, j, 0, 0, 0)) else: print( '[Classifier] RPN Ex: {}-th ,Batch : {}, Anchor Box: {}-th, Classifier Model Classification loss: {} Regression loss: {} Total Loss: {} ' .format(i, b, j, class_class_loss / count_class, regr_class_loss / count_class, total_class_loss / count_class)) if i % args.display_rpn == 0: if len(rpn_accuracy_rpn_monitor) == 0: print( '[RPN] RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.' ) else: mean_overlapping_bboxes = float(sum( rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor) print( '[RPN] Mean number of bounding boxes from RPN overlapping ground truth boxes: {}' .format(mean_overlapping_bboxes)) print( '[RPN] RPN Ex: {}-th RPN Model Classification loss: {} Regression loss: {} Total Loss: {} ' .format(i, class_rpn_loss / count_rpn, regr_rpn_loss / count_rpn, total_rpn_loss / count_rpn)) print("-- END OF EPOCH -- {}".format(epoch)) print("------------------------------") print( '[RPN] RPN Ex: {}-th RPN Model Classification loss: {} Regression loss: {} Total Loss: {} ' .format(i, class_rpn_loss / count_rpn, regr_rpn_loss / count_rpn, total_rpn_loss / count_rpn)) if count_class == 0: print( '[Classifier] RPN Ex: {}-th ,Batch : {}, Anchor Box: {}-th, Classifier Model Classification loss: {} Regression loss: {} Total Loss: {}' .format(i, b, j, 0, 0, 0)) else: print( '[Classifier] RPN Ex: {}-th ,Batch : {}, Anchor Box: {}-th, Classifier Model Classification loss: {} Regression loss: {} Total Loss: {} ' .format(i, b, j, class_class_loss / count_class, regr_class_loss / count_class, total_class_loss / count_class)) if len(rpn_accuracy_rpn_monitor) == 0: print( '[RPN] RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.' ) else: mean_overlapping_bboxes = float( sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor) print( '[RPN] Mean number of bounding boxes from RPN overlapping ground truth boxes: {}' .format(mean_overlapping_bboxes)) print('Total Loss {}'.format(total_class_loss / count_class + total_rpn_loss / count_rpn)) print("------------------------------")
def L1_smooth_loss(x, y): abs_diff = torch.abs(x - y) abs_diff_lt_1 = torch.le(abs_diff, 1) return torch.mean(torch.where(abs_diff_lt_1, 0.5 * abs_diff ** 2, abs_diff - 0.5))
def search( self, start_predictions: torch.Tensor, start_state: Dict[str, torch.Tensor], step: StepFunctionType, fsm: torch.Tensor, ) -> Tuple[torch.Tensor, torch.Tensor]: r""" Given a starting state, a step function, and an FSM adjacency matrix, apply Constrained Beam Search to find most likely target sequences satisfying specified constraints in FSM. .. note:: If your step function returns ``-inf`` for some log probabilities (like if you're using a masked log-softmax) then some of the "best" sequences returned may also have ``-inf`` log probability. Specifically this happens when the beam size is smaller than the number of actions with finite log probability (non-zero probability) returned by the step function. Therefore if you're using a mask you may want to check the results from ``search`` and potentially discard sequences with non-finite log probability. Parameters ---------- start_predictions : torch.Tensor A tensor containing the initial predictions with shape ``(batch_size, )``. These are usually just ``@@BOUNDARY@@`` token indices. start_state : ``Dict[str, torch.Tensor]`` The initial state passed to the ``step`` function. Each value of the state dict should be a tensor of shape ``(batch_size, *)``, where ``*`` means any other number of dimensions. step : ``StepFunctionType`` A function that is responsible for computing the next most likely tokens, given the current state and the predictions from the last time step. The function should accept two arguments. The first being a tensor of shape ``(group_size,)``, representing the index of the predicted tokens from the last time step, and the second being the current state. The ``group_size`` will be ``batch_size * beam_size * num_fsm_states`` except in the initial step, for which it will just be ``batch_size``. The function is expected to return a tuple, where the first element is a tensor of shape ``(group_size, vocab_size)`` containing the log probabilities of the tokens for the next step, and the second element is the updated state. The tensor in the state should have shape ``(group_size, *)``, where ``*`` means any other number of dimensions. Returns ------- Tuple[torch.Tensor, torch.Tensor] Tuple of ``(predictions, log_probabilities)``, where ``predictions`` has shape ``(batch_size, num_fsm_states, beam_size, max_steps)`` and ``log_probabilities`` has shape ``(batch_size, num_fsm_states, beam_size)``. """ # shape: (batch_size, num_fsm_states, num_fsm_states, vocab_size) batch_size, num_fsm_states, _, vocab_size = fsm.size() # List of (batch_size, num_fsm_states, beam_size) tensors. One for each time step. Does not # include the start symbols, which are implicit. predictions: List[torch.Tensor] = [] # List of (batch_size, num_fsm_states, beam_size) tensors. One for each time step. None for # the first. Stores the index n for the parent prediction. backpointers: List[torch.Tensor] = [] # Calculate the first timestep. This is done outside the main loop because we are going # from a single decoder input (the output from the encoder) to the top `beam_size` # decoder outputs per FSM state. On the other hand, within the main loop we are going # from the `beam_size` elements of the beam (per FSM state) to `beam_size`^2 candidates # from which we will select the top `beam_size` elements for the next iteration. # shape: start_class_log_probabilities (batch_size, vocab_size) # shape: state["h1"], state["c1"]... etc. (batch_size, hidden_size) start_class_log_probabilities, state = step( previous_predictions=start_predictions, states=start_state) vocab_size = start_class_log_probabilities.size(-1) start_state_predictions = start_class_log_probabilities.view( batch_size, 1, vocab_size).expand(batch_size, num_fsm_states, vocab_size) start_state_predictions = start_state_predictions.masked_fill( 1 - fsm[:, 0, :, :], float("-inf")) # (batch_size, num_fsm_states, beam_size) start_top_log_probabilities, start_predicted_classes = start_state_predictions.topk( self.beam_size) # shape: (batch_size, num_fsm_states, beam_size) last_log_probabilities = start_top_log_probabilities predictions.append(start_predicted_classes.view(batch_size, -1)) log_probs_after_end = torch.full( (1, vocab_size), float("-inf")).to(start_predictions.device) log_probs_after_end[:, self._end_index] = 0.0 state = { key: _enlarge_single_tensor(value, batch_size, num_fsm_states, self.beam_size) for (key, value) in state.items() } step_state_mask = fsm.view(batch_size, num_fsm_states, num_fsm_states, 1, vocab_size).expand(batch_size, num_fsm_states, num_fsm_states, self.beam_size, vocab_size) for timestep in range(self.max_steps - 1): # shape: (batch_size * beam_size * num_fsm_states, ) last_predictions = predictions[-1].reshape( batch_size * self.beam_size * num_fsm_states) if (last_predictions == self._end_index).all(): break class_log_probabilities, state = step( previous_predictions=last_predictions, states=state) last_predictions_expanded = ( last_predictions.view(-1).unsqueeze(-1).expand( batch_size * num_fsm_states * self.beam_size, vocab_size)) cleaned_log_probabilities = torch.where( last_predictions_expanded == self._end_index, log_probs_after_end, class_log_probabilities, ) cleaned_log_probabilities = cleaned_log_probabilities.view( batch_size, num_fsm_states, self.beam_size, vocab_size) restricted_predicted_classes = torch.LongTensor( batch_size, num_fsm_states, self.beam_size).to(start_predictions.device) restricted_beam_log_probs = torch.FloatTensor( batch_size, num_fsm_states, self.beam_size).to(start_predictions.device) restricted_beam_indices = torch.LongTensor( batch_size, num_fsm_states, self.beam_size).to(start_predictions.device) expanded_last_log_probabilities = last_log_probabilities.view( batch_size, num_fsm_states, self.beam_size, 1).expand(batch_size, num_fsm_states, self.beam_size, self.per_node_beam_size) for i in range(num_fsm_states): # shape (batch_size, num_fsm_states, self.beam_size, vocab_size) state_log_probabilities = cleaned_log_probabilities state_log_probabilities = state_log_probabilities.masked_fill( 1 - step_state_mask[:, :, i, :, :], -1e20) top_log_probabilities, predicted_classes = state_log_probabilities.topk( self.per_node_beam_size) summed_top_log_probabilities = ( top_log_probabilities + expanded_last_log_probabilities) # shape: (batch_size, old_num_fsm_states * beam_size * per_node_beam_size) reshaped_summed = summed_top_log_probabilities.reshape( batch_size, -1) # shape: (batch_size, old_num_fsm_states * beam_size * per_node_beam_size) reshaped_predicted_classes = predicted_classes.reshape( batch_size, -1) # shape (batch_size, beam_size) state_beam_log_probs, state_beam_indices = reshaped_summed.topk( self.beam_size) # shape (batch_size, beam_size) state_predicted_classes = reshaped_predicted_classes.gather( 1, state_beam_indices) restricted_predicted_classes[:, i, :] = state_predicted_classes restricted_beam_indices[:, i, :] = state_beam_indices restricted_beam_log_probs[:, i, :] = state_beam_log_probs restricted_predicted_classes = restricted_predicted_classes.view( batch_size, -1) predictions.append(restricted_predicted_classes) backpointer = restricted_beam_indices / self.per_node_beam_size backpointers.append(backpointer.view(batch_size, -1)) last_log_probabilities = restricted_beam_log_probs.view( batch_size, num_fsm_states, -1) def track_back_state(state_tensor): _, *last_dims = state_tensor.size() # shape: (batch_size, beam_size, *) expanded_backpointer = backpointer.view( batch_size, num_fsm_states * self.beam_size, *([1] * len(last_dims))).expand( batch_size, num_fsm_states * self.beam_size, *last_dims) # shape: (batch_size * beam_size, *) return (state_tensor.reshape( batch_size, num_fsm_states * self.beam_size, *last_dims).gather(1, expanded_backpointer).reshape( batch_size * num_fsm_states * self.beam_size, *last_dims)) state = { key: track_back_state(value) for (key, value) in state.items() } # Reconstruct the sequences. # shape: [(batch_size, beam_size, 1)] reconstructed_predictions = [predictions[-1].unsqueeze(2)] # shape: (batch_size, beam_size) cur_backpointers = backpointers[-1] for timestep in range(len(predictions) - 2, 0, -1): # shape: (batch_size, beam_size, 1) cur_preds = predictions[timestep].gather( 1, cur_backpointers).unsqueeze(2) reconstructed_predictions.append(cur_preds) # shape: (batch_size, beam_size) cur_backpointers = backpointers[timestep - 1].gather( 1, cur_backpointers) # shape: (batch_size, beam_size, 1) final_preds = predictions[0].gather(1, cur_backpointers).unsqueeze(2) reconstructed_predictions.append(final_preds) # shape: (batch_size, beam_size, max_steps) all_predictions = torch.cat(list(reversed(reconstructed_predictions)), 2) all_predictions = all_predictions.view(batch_size, num_fsm_states, self.beam_size, -1) return all_predictions, last_log_probabilities
def test(model, device, test_loader, criterion, mode="raw-task", dataset="cifar10", poison_type="fashion"): class_correct = list(0. for i in range(10)) class_total = list(0. for i in range(10)) if dataset in ("mnist", "emnist"): target_class = 7 if mode == "raw-task": classes = [str(i) for i in range(10)] elif mode == "targetted-task": if poison_type == 'ardis': classes = [str(i) for i in range(10)] else: classes = [ "T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot" ] elif dataset == "cifar10": classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # target_class = 2 for greencar, 9 for southwest if poison_type in ("howto", "greencar-neo"): target_class = 2 else: target_class = 9 model.eval() test_loss = 0 correct = 0 backdoor_correct = 0 backdoor_tot = 0 final_acc = 0 task_acc = None with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) _, predicted = torch.max(output, 1) c = (predicted == target).squeeze() #test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss test_loss += criterion(output, target).item() pred = output.argmax( dim=1, keepdim=True) # get the index of the max log-probability correct += pred.eq(target.view_as(pred)).sum().item() # check backdoor accuracy if poison_type == 'ardis': backdoor_index = torch.where(target == target_class) target_backdoor = torch.ones_like(target[backdoor_index]) predicted_backdoor = predicted[backdoor_index] backdoor_correct += ( predicted_backdoor == target_backdoor).sum().item() backdoor_tot = backdoor_index[0].shape[0] # logger.info("Target: {}".format(target_backdoor)) # logger.info("Predicted: {}".format(predicted_backdoor)) #for image_index in range(test_batch_size): for image_index in range(len(target)): label = target[image_index] class_correct[label] += c[image_index].item() class_total[label] += 1 test_loss /= len(test_loader.dataset) if mode == "raw-task": for i in range(10): logger.info('Accuracy of %5s : %.2f %%' % (classes[i], 100 * class_correct[i] / class_total[i])) if i == target_class: task_acc = 100 * class_correct[i] / class_total[i] logger.info( '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'. format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) final_acc = 100. * correct / len(test_loader.dataset) elif mode == "targetted-task": if dataset in ("mnist", "emnist"): for i in range(10): logger.info( 'Accuracy of %5s : %.2f %%' % (classes[i], 100 * class_correct[i] / class_total[i])) if poison_type == 'ardis': # ensure 7 is being classified as 1 logger.info( 'Backdoor Accuracy of %.2f : %.2f %%' % (target_class, 100 * backdoor_correct / backdoor_tot)) final_acc = 100 * backdoor_correct / backdoor_tot else: # trouser acc final_acc = 100 * class_correct[1] / class_total[1] elif dataset == "cifar10": logger.info( '#### Targetted Accuracy of %5s : %.2f %%' % (classes[target_class], 100 * class_correct[target_class] / class_total[target_class])) final_acc = 100 * class_correct[target_class] / class_total[ target_class] return final_acc, task_acc
# 405 in svhn zeros = torch.zeros(1).cuda() ones = torch.ones(1).cuda() minimum = 999999999 for threshold in thresholds: total = 0 in_distr_tot = 0 out_distr_tot = 0 threshold = torch.tensor(threshold).cuda() for in_data, out_data in zip(testloader, svhntestloader): total += m # in distribution data_tmp = torch.tensor(in_data[0]).cuda() in_distr_predict = f.softmax(classifier(data_tmp), dim=1) max_probability = in_distr_predict.max(1)[0] in_distr_below_threshold = torch.where( max_probability <= threshold, max_probability, zeros) # for counting in_distr_below_threshold = torch.where( in_distr_below_threshold == 0, in_distr_below_threshold, ones) in_distr_tot += in_distr_below_threshold.sum() #out of distribution data_tmp = torch.tensor(out_data[0]).cuda() out_distr_predict = f.softmax(classifier(data_tmp), dim=1) max_probability = out_distr_predict.max(1)[0] out_distr_above_threshold = torch.where( max_probability > threshold, max_probability, zeros) # for counting out_distr_above_threshold = torch.where( out_distr_above_threshold == 0, out_distr_above_threshold, ones)