def forward(self, x): preds = [m(x) for m in self.models] preds_mean = torch.mean(torch.stack(preds), axis=0) preds = torch.stack([torch.max(pred, -1)[1] for pred in preds]) preds = torch.mode(preds, 0)[0] return preds
def connected_components(self, Z): """ Compute simple connected components algorithm. @param Z: a [n x d] torch.FloatTensor of datapoints @return: a [n] torch.LongTensor of cluster labels """ n, d = Z.shape K = 0 # SAMPLING/GROUPING cluster_labels = torch.ones( (n, ), dtype=torch.long, device=Z.device) * -1 for i in range(n): if cluster_labels[i] == -1: # Find all points close to it and label it the same distances = self.distance(Z, Z[i:i + 1]) # Shape: [n x 1] component_seeds = distances[:, 0] <= self.epsilon # If at least one component already has a label, then use the mode of the label if torch.unique(cluster_labels[component_seeds]).shape[0] > 1: temp = cluster_labels[component_seeds] temp = temp[temp != -1] label = torch.mode(temp)[0].to(Z.device) else: label = torch.tensor(K).to(Z.device) K += 1 # Increment number of clusters cluster_labels[component_seeds] = label return cluster_labels
def compile_items(self, window_size, step_size): segments = zip( get_windows(torch.Tensor(self.acc), window_size, step_size), get_windows(torch.Tensor(self.ppg), window_size, step_size), get_windows(torch.Tensor(self.hr), window_size, step_size), get_windows(torch.Tensor(self.activity), window_size, step_size), ) items = [] for n_in_experiment, (acc, ppg, hr, activity) in enumerate(segments): activity = activity.squeeze() activity_label = torch.mode(activity, 0)[0].long() item = { 'acc': acc, 'ppg': ppg, 'hr': hr, 'hr_label': self._get_hr_label(hr, window_size).view(1), 'activity': activity, 'activity_label': activity_label, 'n_in_experiment': torch.tensor(n_in_experiment), 'experiment_id': torch.tensor(int(self.user_info.item(0)['Name'])), 'user_id': torch.tensor(int(self.user_info.item(0)['Name'])) } if not self.ignore_zero: items.append(item) elif item['activity_label'] != 0: item['activity_label'] -= 1 items.append(item) return items
def classifyImage(path, model, classes): image = cv2.imread(path, 1) PILImage = Image.fromarray(image) testTransforms = transforms.Compose([ transforms.Resize(255), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) input = testTransforms(PILImage) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") input = input.to(device) # output = model.forward(input[None, ...]) output = model.forward(input[None]) probabilityOutput = torch.exp(output) topProbability, predictedClass = probabilityOutput.topk(1, dim=1) predictedClass = torch.squeeze(predictedClass) mode = torch.mode(predictedClass, 0) # fig = plt.figure(figsize=(28, 8)) # ax = fig.add_subplot(2, 20 / 2, 1, xticks=[], yticks=[]) # plt.imshow(np.transpose(input.cpu().numpy(), (1, 2, 0)).astype('uint8')) # ax.set_title(classes[mode[0].item()]) return classes[mode[0].item()]
def variation_ratios(predictions): label_predictions = torch.max(predictions, dim=2)[1] modes, _ = torch.mode(label_predictions, dim=0) num_occurences = (label_predictions == modes).sum(dim=0) scores = 1. - num_occurences.float() / label_predictions.shape[0] return scores
def forward(self, inputs, k): inputs = inputs.unsqueeze(1).repeat(1, self.data.shape[0], 1) distance = ((inputs - self.data) ** 2).sum(dim=2) # shape=(num_sample, num_base_sample) _, index = torch.topk(distance, k, dim=1) # index shape=(num_sample, k) index = index // self.num_each_class pred, _ = torch.mode(index, dim=1) return pred
def predict(self, data): ''' Use classifier to predict data label. Will prioritize using label tags if defined. :param data: list, np.array, or torch.Tensor ''' try: if 'Tensor' not in str(type(data)): data = torch.Tensor([data]) if len(data.shape) == 1: data = torch.reshape(data, [1, data.shape[0]]) except: assert (False), "Invalid data type (Expected torch.Tensor)" #main algorithm distances = self.data - data distances = torch.sum(distances**2, dim=1) inds = torch.argsort( distances) #can change this, do we want O(k*n) or O(n log(n))? lab = [] for i in range(min(self.k, inds.shape[0])): lab.append(self.labels[inds[i]]) lab = torch.Tensor(lab).squeeze() out = torch.mode(lab)[0] if self.reverse_tags is not None: return self.reverse_tags[int(out)] else: return int(out)
def reduction_ops(self): a = torch.randn(4) b = torch.randn(4) return ( torch.argmax(a), torch.argmin(a), torch.amax(a), torch.amin(a), torch.aminmax(a), torch.all(a), torch.any(a), torch.max(a), torch.min(a), torch.dist(a, b), torch.logsumexp(a, 0), torch.mean(a), torch.nanmean(a), torch.median(a), torch.nanmedian(a), torch.mode(a), torch.norm(a), torch.nansum(a), torch.prod(a), torch.quantile(a, torch.tensor([0.25, 0.5, 0.75])), torch.nanquantile(a, torch.tensor([0.25, 0.5, 0.75])), torch.std(a), torch.std_mean(a), torch.sum(a), torch.unique(a), torch.unique_consecutive(a), torch.var(a), torch.var_mean(a), torch.count_nonzero(a), )
def get_predict(self, unlabeled_path): all_input_ids, all_token_type_ids, \ all_attention_mask, all_label_ids = self.get_X_y_ids(unlabeled_path) dataset = TensorDataset(all_input_ids, all_token_type_ids, all_attention_mask) batch_size = self.n_gpu * self.per_gpu_batch_size dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False) model = CLS_Model(vocab_size=self.bert_tokenizer.vocab_size, embed_size=self.embed_size, num_labels=len(self.label_list), dense_layer_type=self.dense_layer_type, dropout=self.dropout, embed_type=self.embed_type, max_len=self.max_seq_len, model_name_or_path=self.model_name_or_path, vector_file=self.vector_file) model.to(self.device) y_preds = [] for model_state_path in glob( os.path.join(self.output_dir, '*{}*.pt*'.format(self.model_name))): model.load_state_dict(torch.load(model_state_path)) y_pred = self.single_predict(model, dataloader) y_preds.append(y_pred) y_preds = torch.tensor(y_preds) y_pred = torch.mode(y_preds, dim=0).values y_pred = y_pred.numpy() return y_pred
def relabel(y, y_hat): k = len(y_hat.unique()) y_hat_rl = y_hat.clone() for i in range(k): l = torch.mode(y[y_hat == i])[0] y_hat_rl[y_hat == i] = l return y_hat_rl
def predict(self, x): # get number of samples in batch bs = x.shape[0] # add noise to input batch x = x.unsqueeze(1) + self.input_dist.sample()[:bs].to(self.device) # reshape input batch by stacking samples into batch dimension x = x.view(( bs * self.n_samples, self.config['input_dimensions'][-1], self.config['input_dimensions'][0], self.config['input_dimensions'][1])) # compute output batch logits and predictions logits = self.model(x) pred = torch.argmax(logits, dim=1) # reshape predictions to unstack samples from batch dimension pred = pred.view((bs, self.n_samples)) # take mode along sample dim to get final prediction pred = torch.mode(pred, dim=1)[0] return pred
def predict(self, X): all_input_ids, all_input_mask_ids, all_label_ids, all_label_mask_ids = self.get_X_y_ids(X) dataset = TensorDataset(all_input_ids, all_input_mask_ids, all_label_ids) batch_size = self.n_gpu * self.per_gpu_batch_size dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False) model = NER_Model(vocab_size=self.bert_tokenizer.vocab_size, embed_size=self.embed_size, num_tags=len(self.label_list), max_len=self.max_seq_len, device=self.device, dense_layer_type=self.dense_layer_type, dropout=self.dropout, embed_type=self.embed_type, model_name_or_path=self.model_name_or_path, vector_file=self.vector_file) model.to(self.device) y_preds = [] for model_state_path in glob(os.path.join(self.output_dir, '*{}*.pt*'.format(self.model_name))): model.load_state_dict(torch.load(model_state_path)) y_pred = self.single_predict(model, dataloader) y_preds.append(y_pred) y_preds = torch.tensor(y_preds) y_pred = torch.mode(y_preds, dim=0).values y_pred = y_pred.numpy() preds_list = [[] for _ in range(all_label_mask_ids.shape[0])] for i in range(all_label_mask_ids.shape[0]): for j in range(all_label_mask_ids.shape[1]): if all_label_mask_ids[i, j] != -100: preds_list[i].append(self.label_list[y_pred[i][j]]) return preds_list
def knn_predict(feature, feature_bank, feature_labels, classes, knn_k, knn_t): # compute cos similarity between each feature vector and feature bank ---> [B, N] preds = [] sim_matrix = torch.mm(feature, feature_bank) # [B, K] sim_weight, sim_indices = sim_matrix.topk(k=knn_k, dim=-1) # [B, K] sim_labels = torch.gather(feature_labels.expand(feature.size(0), -1), dim=-1, index=sim_indices) for row in tqdm(sim_labels): if len(torch.unique(row)) == len(row): preds.append(row[0].item()) else: preds.append(torch.mode(row)[0].item()) preds = torch.tensor(preds) print((preds == gt_labels).sum() / 50000) sim_weight = (sim_weight / knn_t).exp() # counts for each class one_hot_label = torch.zeros(feature.size(0) * knn_k, classes) # [B*K, C] one_hot_label = one_hot_label.scatter(dim=-1, index=sim_labels.view(-1, 1), value=1.0) # weighted score ---> [B, C] # pred_scores = torch.sum(one_hot_label.view(feature.size(0), -1, classes) * sim_weight.unsqueeze(dim=-1), dim=1) pred_scores = torch.sum(one_hot_label.view(feature.size(0), -1, classes), dim=1) pred_labels = pred_scores.argsort(dim=-1, descending=True) return (pred_labels[:, 0] == gt_labels).sum() / 50000
def ensemble_result(args,G_list,F_list, im_data,alphas): predictions = torch.tensor(np.zeros((len(G_list),im_data.shape[0]))).cuda() count = 0 #based on whether or not alphas is None or an array, pick how we use the #classifier outputs #print("im_data shape: ", im_data.shape) #print("predictions shape: ", predictions.shape) pred = [] if (alphas is None): for G,F in zip(G_list,F_list): feat = G(im_data) output = F(feat) predictions[count,:]=output.data.max(1)[1] count+=1 #majority voting pred = torch.mode(predictions,0) pred = pred.values else: #print("Alphas: " , alphas, " adaboost eval") output_overall = [] for G,F in zip(G_list,F_list): feat = G(im_data) output = F(feat) if (count == 0): output_overall = alphas[count]*output.data count+=1 else: output_overall+= alphas[count]*output.data count+=1 pred = output_overall.max(1)[1] return pred
def test_cifar(args, Z, names, arch): _, test_loader = datagen.load_cifar(args) criterion = nn.CrossEntropyLoss() pop_size = args.batch_size with torch.no_grad(): correct = 0. test_loss = 0 for data, target in test_loader: data, target = data.cuda(), target.cuda() outputs = [] for i in range(pop_size): params = [Z[0][i], Z[1][i], Z[2][i], Z[3][i], Z[4][i]] model = weights_to_clf(params, names, arch) output = model(data) outputs.append(output) pop_outputs = torch.stack(outputs) pop_labels = pop_outputs.max(2, keepdim=True)[1].view( pop_size, 100, 1) modes, idxs = torch.mode(pop_labels, dim=0, keepdim=True) modes = modes.view(100, 1) correct += modes.eq(target.data.view_as(modes)).long().cpu().sum() test_loss += criterion(output, target).item() # sum up batch loss test_loss /= len(test_loader.dataset) acc = (correct.float() / len(test_loader.dataset)).item() return acc, test_loss
def predict(self, x_test: Tensor): """ Predict the most likely class for each sample in a given tensor. :param x_test: Tensor of shape (N,D) where N is the number of samples. :return: A tensor of shape (N,) containing the predicted classes. """ # Calculate distances between training and test samples dist_matrix = self.calc_distances(x_test) # For each training sample we'll look for it's k-nearest neighbors. # Then we'll predict the label of that sample to be the majority # label of it's nearest neighbors. n_test = x_test.shape[0] y_pred = torch.zeros(n_test, dtype=torch.int64) for i in range(n_test): # - Find indices of k-nearest neighbors of test sample i # - Set y_pred[i] to the most common class among them # ====== YOUR CODE: ====== cur_dists = dist_matrix[:, i] _, top_idx = torch.topk(cur_dists, self.k, largest=False) top_classes = self.y_train[top_idx] y_pred[i] = torch.mode(top_classes)[0].item() # ======================== return y_pred
def generate_candidate_frames(img, spacing=6, sigma=9, blur_sigma=15, contrast=1.2, downsample=8, use_mode=False): """ Generate the candidates frames used to compute saliencies (I') using masks representing Gaussian perturbations with spaced every `spacing steps ` output_i = mask_i * blurred_img + (1-mask_i) * img :param img: input image :param spacing: space between each perturbation :param sigma: scale of the perturbations :param blur_sigma: scale of the gaussian noise to blur the image :param contrast: contrast for the final mask values (mask = torch.clamp(contrast * mask, 0, 1)) :param downsample: downsampling size :param use_mode: use color mode of the input image instead of the blurred version (mode = background color) :return: candidate frames to compute saliency """ masks = generate_mask(img.shape[2:], spacing, sigma, downsample=downsample, contrast=contrast) if use_mode: mode, _ = torch.mode(img.transpose(1, 0).view(4, -1), -1) blurred_img = mode[None, :, None, None].expand_as(img) else: blurred_img = blur_image(img.float(), blur_sigma, downsample) outputs = (masks) * blurred_img + (1 - masks) * img.float() return outputs, masks
def make_seg_labels(self, seg): zeros = torch.zeros(seg.shape).cuda() noises = torch.rand(seg.shape).cuda() per = torch.where(seg == 0, noises, zeros) seg_labels = torch.mode(per + seg, dim=-2)[0] seg_labels[seg_labels < 1] = 0 return seg_labels
def validate(model, test_loader, use_cuda, criterion): # validation -- this is a crude estimation because there might be some paddings at the end correct_cnt = 0.0 model.eval() for it, test_data in enumerate(test_loader): vote = [] for data_dic in test_data: if use_cuda: imgs, labels = Variable( data_dic['image'], volatile=True).cuda(), Variable(data_dic['label'], volatile=True).cuda() else: imgs, labels = Variable(data_dic['image'], volatile=True), Variable( data_dic['label'], volatile=True) test_output = model(imgs) _, predict = test_output.topk(1) vote.append(predict) vote = torch.cat(vote, 1) final_vote, _ = torch.mode(vote, 1) ground_truth = test_data[0]['label'] correct_this_batch = (final_vote.cpu().data == ground_truth).sum() correct_cnt += correct_this_batch accuracy = float(correct_this_batch) / len(ground_truth) logging.info("batch {0} dev accuracy is : {1:.5f}".format( it, accuracy)) return correct_cnt
def generate_start_pos(self): if isinstance(self.START_POS, torch.Tensor): return self.START_POS startpos = self.START_POS.sample() sample_one_agent = False if len(startpos.shape) == 1: startpos = torch.stack( [self.START_POS.sample() for _ in range(self.N_AGENTS)], dim=0) sample_one_agent = True codist = self.get_relative_position(startpos).norm(dim=2) codist.diagonal().fill_(float('inf')) while torch.any(codist < 2 * self.AGENT_RADIUS): collisions = codist < 2 * self.AGENT_RADIUS idxs = [] while torch.sum(collisions) != 0: idx = torch.mode(torch.where(collisions)[0])[0] idxs.append(idx) collisions[idx, :] = 0 collisions[:, idx] = 0 idxs = torch.tensor(idxs) if sample_one_agent: for idx in idxs: startpos[idx, :] = self.START_POS.sample() else: startposnew = self.START_POS.sample() startpos[idxs, :] = startposnew[idxs, :] codist = self.get_relative_position(startpos).norm(dim=2) codist.diagonal().fill_(float('inf')) return startpos
def predict(self, x_test: Tensor): """ Predict the most likely class for each sample in a given tensor. :param x_test: Tensor of shape (N,D) where N is the number of samples. :return: A tensor of shape (N,) containing the predicted classes. """ # Calculate distances between training and test samples dist_matrix = l2_dist(self.x_train, x_test) # TODO: # Implement k-NN class prediction based on distance matrix. # For each training sample we'll look for it's k-nearest neighbors. # Then we'll predict the label of that sample to be the majority # label of it's nearest neighbors. n_test = x_test.shape[0] y_pred = torch.zeros(n_test, dtype=torch.int64) for i in range(n_test): # TODO: # - Find indices of k-nearest neighbors of test sample i # - Set y_pred[i] to the most common class among them # - Don't use an explicit loop. # ====== YOUR CODE: ====== _, indices = torch.topk(dist_matrix[:, i], largest=False, k=self.k) y_pred[i], _ = torch.mode(self.y_train[indices]) # ======================== return y_pred
def classify(self, X_que, y_pool, X_pool, k=5): n_que = X_que.shape[0] y_pred = th.zeros(n_que, device=self.device, dtype=th.int) for i in range(n_que): ranks = self.rank(X_que[i], X_pool) y_pred[i] = th.mode(y_pool[ranks[0:k]])[0].int() return y_pred
def test(args, model, device, test_loader, criterion, batch_size, num_labels): conf_mat = np.zeros((num_labels, num_labels)) model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for inputs, labels in test_loader: inputs, labels = inputs.float().to(device), labels.long().to( device) passes_pred = [] for _ in range(args.inference_passes): output = model(inputs) test_loss += criterion( output, labels).sum().item() # sum up batch loss passes_pred.append(output.argmax(dim=1, keepdim=True)) pred = torch.mode(torch.cat(passes_pred, dim=1), dim=1, keepdim=True)[0] correct += pred.eq(labels.view_as(pred)).sum().item() conf_mat += confusion_matrix(labels.cpu().numpy(), pred.cpu().numpy(), labels=range(num_labels)) test_loss /= len(test_loader.dataset) print( '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) print("Confusion Matrix:\n", np.int_(conf_mat))
def forward(self, x): x = x.double() if self.kernel == "linear": k = torch.mm(x, self.sv_t) elif self.kernel == "rbf": # using quadratic expansion--susseptible to rounding-off errors # http://www.robots.ox.ac.uk/~albanie/notes/Euclidean_distance_trick.pdf x_norm = -self.gamma * (x ** 2).sum(1).view(-1, 1) k = torch.exp(x_norm + self.sv_norm + 2.0 * self.gamma * torch.mm(x, self.sv_t)) elif self.kernel == "sigmoid": k = torch.sigmoid(self.gamma * torch.mm(x, self.sv_t) + self.coef0) else: # poly kernel k = torch.pow(self.gamma * torch.mm(x, self.sv_t) + self.coef0, self.degree) c = [ sum(self.a[i, p] * k[:, p : p + 1] for p in range(self.start[j], self.end[j])) + sum(self.a[j - 1, p] * k[:, p : p + 1] for p in range(self.start[i], self.end[i])) for i in range(self.len_nv) for j in range(i + 1, self.len_nv) ] c = torch.cat(c, dim=1) + self.b if self.n_classes == 2: class_ids = torch.gt(c, 0.0).int().flatten() else: votes = torch.where(c > 0, self.true_classes, self.false_classes) # TODO mode is still not implemented for GPU backend votes = votes.data.cpu() class_ids, _ = torch.mode(votes, dim=1) # no class probabilities in SVC if self.perform_class_select: temp = torch.index_select(self.classes, 0, class_ids.long()) return temp, temp else: return class_ids, class_ids
def predict(self, unlabeled_path, start_time, train_time): all_input_ids, all_token_type_ids, \ all_attention_mask, all_label_ids = self.get_X_y_ids(unlabeled_path) dataset = TensorDataset(all_input_ids, all_token_type_ids, all_attention_mask) batch_size = self.n_gpu * self.per_gpu_batch_size dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False) model = CLS_Model(vocab_size=self.bert_tokenizer.vocab_size, embed_size=self.embed_size, num_labels=len(self.label_list), dense_layer_type=self.dense_layer_type, dropout=self.dropout, embed_type=self.embed_type, max_len=self.max_seq_len, model_name_or_path=self.model_name_or_path, vector_file=self.vector_file) model.to(self.device) y_preds = [] for model_state_path in glob( os.path.join(self.output_dir, '*{}*.pt*'.format(self.model_name))): model.load_state_dict(torch.load(model_state_path)) y_pred = self.single_predict(model, dataloader) y_preds.append(y_pred) y_preds = torch.tensor(y_preds) y_pred = torch.mode(y_preds, dim=0).values y_pred = y_pred.numpy() report = classification_report(y_true=all_label_ids.numpy(), y_pred=y_pred, digits=4) predix = os.path.split(unlabeled_path)[-1].replace(".csv", "") score_file = os.path.join( self.output_dir, 'score_{}_{}.txt'.format(predix, self.model_name)) data_df = pd.read_csv(unlabeled_path, names=["q1", "q2", "label", "topic"]) data_df['pred'] = y_pred data_df.to_csv(os.path.join( self.output_dir, 'pred_{}_{}.csv'.format(predix, self.model_name)), index=False) with open(score_file, 'w', encoding="utf-8") as w: w.write(report) w.write("\n") w.write("train time cost:\t {:.2f} s".format(train_time)) w.write("\n") w.write("time cost:\t {:.2f} s".format(time.time() - start_time - train_time)) w.write("\n") w.write("args:\n{}".format('\n'.join( ['%s:%s' % item for item in self.__dict__.items()])))
def get_ins_list(sem_pred, center_pred, offset_pred, thing_list, threshold=0.1, nms_kernel=7, top_k=None): """ get instance list from prediction results. Args: sem_pred: tensor of size [1, C, H, W]. sementic segmentation prediction. center_pred: tensor of size [1, 1, H, W]. center heatmap prediction. offset_pred: tensor of size [1, 2, H, W]. offset prediction. thing_list: List of thing class ids (int). things can be instance and belongs to foregound. threshold: float. threshold for center_pred activation. nms_kernel: int. max pooling kernel size for filtering center activations. top_k: int. number of center points to be preserved from predicted center heatmap. Returns: ins_list: list of dictionary. e.g. {'class_id': 13, 'mask': nparray of size (h, w) , 'score': 0.9876} center_points: list of center points Raises: AssertionError: check prediction maps' dimension. """ # Check argument validity assert sem_pred.dim() == 4 and sem_pred.size(0) == 1 assert center_pred.dim() == 4 and center_pred.size(0) == 1 assert offset_pred.dim() == 4 and offset_pred.size(0) == 1 sem_soft = F.softmax(sem_pred, dim=1) # normalize prediction scores # sem_soft [1, C, H, W] -> sem_hard [1, H, W] sem_hard = get_semantic_segmentation(sem_soft) ins_seg, center_points = get_instance_segmentation(sem_hard, center_pred, offset_pred, thing_list, threshold, nms_kernel, top_k) # select instance's class label by majority bote. instance_ids = torch.unique(ins_seg) ins_list = [] for ins_id in instance_ids: instance = {} if ins_id == 0: continue #majority voting ins_mask = (ins_seg == ins_id) class_id, _ = torch.mode(sem_hard[ins_mask].view(-1, )) instance['class_id'] = class_id.item() # get polygon from binary instance mask instance['mask'] = ins_mask.squeeze(0).cpu().numpy() # Compute confidence score score_sum = torch.sum(sem_soft[:, class_id, :, :] * ins_mask) score_mean = score_sum / torch.sum(ins_mask) instance['score'] = score_mean.item() ins_list.append(instance) if not ins_list: raise RuntimeError('mage has no detected instance.') return ins_list, center_points
def pred_helper(output): pred = torch.zeros(output[0].size(0), len(output)) for k in range(len(output)): #nemo curr_pred = output[k].argmax(dim=-1) val, _ = torch.mode(curr_pred, dim=1) pred[:, k] = val return pred
def merge_semantic_and_instance(sem_seg, ins_seg, label_divisor, thing_list, stuff_area, void_label): """ Post-processing for panoptic segmentation, by merging semantic segmentation label and class agnostic instance segmentation label. Arguments: sem_seg: A Tensor of shape [1, H, W], predicted semantic label. ins_seg: A Tensor of shape [1, H, W], predicted instance label. label_divisor: An Integer, used to convert panoptic id = semantic id * label_divisor + instance_id. thing_list: A List of thing class id. stuff_area: An Integer, remove stuff whose area is less tan stuff_area. void_label: An Integer, indicates the region has no confident prediction. Returns: A Tensor of shape [1, H, W] (to be gathered by distributed data parallel). Raises: ValueError, if batch size is not 1. """ # In case thing mask does not align with semantic prediction pan_seg = torch.zeros_like(sem_seg) + void_label thing_seg = ins_seg > 0 semantic_thing_seg = torch.zeros_like(sem_seg) for thing_class in thing_list: semantic_thing_seg[sem_seg == thing_class] = 1 # keep track of instance id for each class class_id_tracker = {} # paste thing by majority voting instance_ids = torch.unique(ins_seg) for ins_id in instance_ids: if ins_id == 0: continue # Make sure only do majority voting within semantic_thing_seg thing_mask = (ins_seg == ins_id) & (semantic_thing_seg == 1) if torch.nonzero(thing_mask).size(0) == 0: continue class_id, _ = torch.mode(sem_seg[thing_mask].view(-1, )) if class_id.item() in class_id_tracker: new_ins_id = class_id_tracker[class_id.item()] else: class_id_tracker[class_id.item()] = 1 new_ins_id = 1 class_id_tracker[class_id.item()] += 1 pan_seg[thing_mask] = class_id * label_divisor + new_ins_id # paste stuff to unoccupied area class_ids = torch.unique(sem_seg) for class_id in class_ids: if class_id.item() in thing_list: # thing class continue # calculate stuff area stuff_mask = (sem_seg == class_id) & (~thing_seg) area = torch.nonzero(stuff_mask).size(0) if area >= stuff_area: pan_seg[stuff_mask] = class_id * label_divisor return pan_seg
def kNNClassifer(latent_train, latent_test, k): num_each_class = latent_train.shape[0] / 10 latent_test = latent_test.unsqueeze(1).repeat(1, latent_train.shape[0], 1) distance = ((latent_test - latent_train)**2).sum( dim=2) # shape=(num_sample, num_base_sample) _, index = torch.topk(distance, k, dim=1) # index shape=(num_sample, k) index = index // num_each_class pred, _ = torch.mode(index, dim=1) return pred
def _get_point_cloud(self, depth): height = depth.shape[0] width = depth.shape[1] #val,counts = torch.unique(depth, return_counts=True) #table_val = val[counts.argmax()] table_val, _ = torch.mode(torch.flatten(depth), 0) if self.xmap is None: self.xmap = torch.tensor([[j for i in range(width)] for j in range(height)], dtype=torch.float) self.ymap = torch.tensor([[i for i in range(width)] for j in range(height)], dtype=torch.float) # ducktape fix to filter blue box out of point cloud choose_mask = (depth < table_val) choose_mask[164:, :31] = 0 choose = torch.flatten(choose_mask).nonzero().flatten() #choose = (torch.flatten(depth) < table_val).nonzero().flatten() # take random 500 points idx = np.random.choice(choose.shape[0], size=500) choose = choose[idx] depth_masked = depth.flatten()[choose] xmap_masked = self.xmap.flatten()[choose] ymap_masked = self.ymap.flatten()[choose] fovy = 45.0 #f = height / math.tan(fovy * math.pi / 360) # should be height from ground not ... pixels??? f = height / math.tan(fovy * math.pi / 360) cam_scale = 0.1 f /= cam_scale cam = np.array(((f, 0, width / 2), (0, f, height / 2), (0, 0, 1))) cx = height / 2 cy = width / 2 pt2 = depth_masked / cam_scale #pt2 = depth_masked / 0.1 pt0 = (ymap_masked - cx) * pt2 / f pt1 = (xmap_masked - cy) * pt2 / f #pt2 = pt2/0.1 cloud = torch.cat((pt1[:, None], -pt0[:, None], pt2[:, None]), dim=1) # turn depths into coords by subtracting from camera # -0.13 0.6 0.6 # 0 0.55 0.48 cloud[:, 0] = 0. + cloud[:, 0] cloud[:, 1] = 0.6 + cloud[:, 1] cloud[:, 2] = (table_val / 0.1) - cloud[:, 2] + 0.02 choose = choose.view(1, -1) return choose, cloud