def test_iou(self): rect1 = [0, 0, 10, 10] rect2 = [0, 0, 10, 10] rect3 = [0, 0, 5, 5] iou = utils.iou(rect1, rect2) iou2 = utils.iou(rect1, rect3) self.assertEqual(iou, 1) self.assertEqual(0.25, iou2)
def test_iou_no_intersection(): a = [False, False, True, True] b = [True, True, False, False] actual = utils.iou(a, b) assert actual == 0.
def test_iou_no_positive(): a = [False, False, False, False] b = [False, False, False, False] actual = utils.iou(a, b) assert actual == 0.
def test_iou(): a = [False, True, True, True] b = [True, True, True, False] actual = utils.iou(a, b) assert actual == .5
def validate(self, epoch, n_epochs): model.eval() loss_value = 0.0 Accuracy = 0.0 for i, (_, sample) in enumerate(self.Dataloader_val): inputs = sample['image'] labels = sample['masks'] inputs = inputs.cuda() labels = labels.cuda() outputs = self.model(inputs.float()) loss = self.criterion(outputs.float(), labels) loss_value += loss.item() Accuracy += iou(torch.sigmoid(outputs), labels) if i == 0: preds = { 'image': inputs.detach().cpu(), 'masks': (torch.sigmoid(outputs) > 0.3).float().detach().cpu() } self.writer.add_figure('Visulations ', plot_image(sample, preds), self.steps) loss_value /= len(self.Dataloader_val) Accuracy /= len(self.Dataloader_val) print('[%d/%d][%d/%d]\tVal Loss: %.4f\tVal Accuracy: %.4f ' % (epoch, n_epochs, i, len( self.Dataloader_val), loss_value, Accuracy)) self.writer.add_scalar('Validation loss ', loss_value, self.steps) self.writer.add_scalar('Validation accuracy ', Accuracy, self.steps)
def one_scale_loss( feats, matching_true_boxes, anchors, device, num_cls=80, iou_threshold=0.6 ): """default loss params""" lambda_obj = 5 lambda_noobj = 1 lambda_class = 1 lambda_coord = 1 B, H, W, A, _ = matching_true_boxes.size() #p_box, p_c, p_cls = decode(feats, anchors, device, num_cls) out = decode(feats, anchors, device, num_cls) p_box = out[..., 0:4] p_c = out[..., 4:5] p_cls = out[..., 5:] detector_mask = matching_true_boxes[...,4:5] t_box = matching_true_boxes[...,0:4] p_box = whToxy(p_box) t_box = whToxy(t_box) ious = iou(p_box, t_box) best_ious, _ = torch.max(ious, dim=3, keepdim=True) obj_mask = (best_ious > iou_threshold).to(torch.float) obj_mask = obj_mask.to(device) obj_mask = obj_mask.view(B, H, W, 1, 1) t_box = matching_true_boxes[..., 0:4] p_box = out[..., 0:4] """non object loss""" noobj_loss = torch.sum( lambda_noobj * (1-obj_mask) * (1-detector_mask) * (-p_c)**2 ) """object loss""" obj_loss = torch.sum( lambda_obj * obj_mask * detector_mask * (1-p_c)**2 ) """coord loss""" coord_loss = torch.sum( lambda_coord * detector_mask * (p_box - t_box)**2 ) """classification loss""" t_cls = matching_true_boxes[..., 5:] cls_loss_fn= torch.nn.BCELoss(reduction='sum') class_loss = lambda_class * cls_loss_fn(p_cls * detector_mask, t_cls) loss_ = (noobj_loss + obj_loss + coord_loss + class_loss) / B return loss_
def train_epoch(self, epoch, n_epochs): for i, (_, sample) in enumerate(self.Dataloader): self.model.zero_grad() self.model.train() inputs = sample['image'] labels = sample['masks'] inputs = inputs.cuda() labels = labels.cuda() outputs = self.model(inputs.float()) loss = self.criterion(outputs.float(), labels) loss.backward() self.optimizer.step() train_accuracy = iou(torch.sigmoid(outputs), labels) if i % 150 == 0 and i != 0: print('[%d/%d][%d/%d]\tLoss: %.4f\tAccuracy: %.4f ' % (epoch, n_epochs, i, len( self.Dataloader), loss.item(), train_accuracy)) self.writer.add_scalar('training loss ', loss.item(), self.steps) self.writer.add_scalar('training accuracy ', train_accuracy, self.steps) self.validate(epoch, n_epochs) self.steps += 1
def infer(valid_queue, model, criterion): model.eval() tq = tqdm(valid_queue) step = 0 intersections = [] unions = [] for (input, target) in tq: input = torch.tensor(input).float() target = torch.tensor(target) input = Variable(input).cuda() target = Variable(target).cuda().float() logits = model(input) loss = criterion(logits, target) acc = utils.accuracy(logits, target) iou, intersection, union = utils.iou(logits, target) # appending masks here intersections.append(intersection.item()) unions.append(union.item()) if step % args.report_freq == 0: tq.set_postfix({"Acc": acc.item(), "IoU": iou.item()}) step += 1 # for removing all unions where union = 0 unions = np.array(unions) intersections = np.array(intersections) non_zero_mask = unions != 0 mIoU = np.mean(intersections[non_zero_mask]) / \ (np.mean(unions[non_zero_mask]) + 1e-6) return acc, mIoU
def build_iou(labels, logits, name='build_iou'): with tf.name_scope(name): # decode both using ground true classification labels_decoded = utils.boxes_decode( labels['classifications'], labels['regressions_postprocessed']) logits_decoded = utils.boxes_decode( labels['classifications'], logits['regressions_postprocessed']) return utils.iou(labels_decoded.boxes, logits_decoded.boxes)
def main(): results = [] for _ in range(1000): params, img = noisy_circle(200, 50, 2) detected = find_circle(img) results.append(iou(params, detected)) results = np.array(results) print((results > 0.7).mean())
def do_test(model, images_path, labels_path, batch_size=32, progress_callback=None): size = 416 t = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) data = DataLoader(YoloDataset(images_path, labels_path, t, size), batch_size=batch_size, shuffle=True, num_workers=mp.cpu_count()) count = [0] * 80 correct = [0] * 80 with torch.no_grad(): for i, (local_batch, local_labels) in enumerate(data): local_batch = local_batch.to(device) outputs = model(local_batch) detections = yolo.YoloV3.get_detections(outputs) for j, detections in enumerate(utils.parse_detections(detections)): detections = utils.non_max_suppression( detections, confidence_threshold=0.2) for target in utils.parse_labels(local_labels[0][j], size): count[target['coco_idx']] += 1 for det in [ det for det in detections if det.coco_idx == target['coco_idx'] ]: if utils.iou(target['bb'], det.bb) >= 0.5: correct[target['coco_idx']] += 1 break psum = 0 for j in range(80): if count[j] == 0: psum = -80 break p = correct[j] / count[j] psum += p if progress_callback: progress_callback(model, batch_number=(i + 1), batch_count=len(data), map_score=psum / 80) return psum / 80
def forward(self, prediction, target): #prediction(output of YOLO): batch_size * [S * S * (C+ B * 5)] #target (label from dataloader): batch_size * S * S * (C+ B * 5) prediction = prediction.reshape(prediction.shape[0], self.S, self.S, self.C+self.B*5) #calculate the ious between two predicted bounding boxes and ground_truth bounding box iou_box1 = iou(prediction[..., 5:9], target[...,5:9]) iou_box2 = iou(prediction[..., 10:14], target[...,5:9]) ious = torch.cat([iou_box1.unsqueeze(0),iou_box2.unsqueeze(0)], dim=0) #best_box = 0 --> first bounding_box is responsible for prediction #best_box = 1 --> second bounding_box isresponsible for prediction iou_max, best_box = torch.max(ious, dim=0) #size batch_size * S * S * 1 --> vector indicating if there is object inside this grid exist_box = target[...,4].unsqueeze(3) box_predictions = exist_box * ((1 - best_box) * prediction[..., 5:9] + best_box * prediction[..., 10:14]) box_targets = exist_box * target[..., 5:9] #take sqrt of width and height of bounding boxes box_predictions[...,2:4] = torch.sign(box_predictions[...,2:4]) * torch.sqrt(torch.abs(box_predictions[...,2:4] + 1e-6)) box_targets[...,2:4] = torch.sqrt(box_targets[...,2:4] + 1e-6) box_loss = self.loss_criteria(torch.flatten(box_predictions, end_dim= -2), torch.flatten(box_targets, end_dim= -2)) #loss for confidence score pred_box = exist_box * ((1-best_box) * prediction[...,4:5] + best_box * prediction[...,9:10]) object_loss = self.loss_criteria(torch.flatten(pred_box), torch.flatten(exist_box * target[...,4:5])) #loss for nonobject no_object_loss = self.loss_criteria( torch.flatten((1 - exist_box) * (prediction[...,4:5]), start_dim=1), torch.flatten((1 - exist_box) * target[...,4:5], start_dim=1)) no_object_loss += self.loss_criteria( torch.flatten((1 - exist_box) * (prediction[...,9:10]), start_dim=1), torch.flatten((1 - exist_box) * target[...,4:5], start_dim=1)) #loss for class class_loss = self.loss_criteria(torch.flatten(exist_box * prediction[...,:4], end_dim= -2), torch.flatten(exist_box * target[...,:4], end_dim= -2)) loss = self.lambda_coord * box_loss + object_loss + self.lambda_noobj * no_object_loss + class_loss return loss
def create_y_true(self, aug_bbses, aug_labels, anchors): # 3 y_batch,for 3 scale.First for large anchors, second for medium anchor, last for small anchors # grids are 13*13 26*26 52*52 anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # 13*13,26*26,52*52 y_batches = [ np.zeros(shape=(self.batch_size, self.grid * (2**l), self.grid * (2**l), 3, 4 + 1 + len(self.classes))) for l in range(3) ] #8*13*13*3*(4+1+classes) batch size*grid width*grid height*anchors*(4+1+classes) for i in range(len( aug_bbses)): #循环每一张图像对应的bboxes,每次一个batch中的一张,i即index of batch for j in range(len(aug_bbses[i].bounding_boxes)): #循环单张图像上所有的bbox rice = np.zeros(4 + 1 + len(self.classes)) #一个anchor中的内容 rice[4] = 1 # set confidence label_index = self.classes.index(aug_labels[i][j]) rice[5 + label_index] = 1 # set one-hot label max_iou = 0 anchor_index = -1 # should be 0,1,2, 3,4,5, 6,7,8 bbox = aug_bbses[i].bounding_boxes[j] for k in range(0, len(anchors), 2): rect1 = [0, 0, bbox.width, bbox.height] rect2 = [0, 0, anchors[k], anchors[k + 1]] current_iou = utils.iou(rect1, rect2) if current_iou > max_iou: anchor_index = k / 2 max_iou = current_iou for l in range(len(anchor_mask)): if anchor_index in anchor_mask[l]: y_batch_index = l inner_index = int(anchor_index % 3) # different scales have different cell sizes cell_size = self.image_size[0] / (self.grid * (2**y_batch_index)) # calc cell index start from 0 cx = int(np.floor(bbox.center_x / cell_size)) cy = int(np.floor(bbox.center_y / cell_size)) print('i:%s j:%s cx:%s cy:%s' % (i, j, cx, cy)) # scale to grid unit x_new = bbox.center_x / cell_size y_new = bbox.center_y / cell_size w_new = bbox.width / cell_size h_new = bbox.height / cell_size rice[0:4] = [x_new, y_new, w_new, h_new] y_batches[y_batch_index][i][cy][cx][ inner_index] = rice # cx cy or cy cx? seems cy cx is right,cols comes first # check_grid(self.aug_imgs[i],self.aug_bbses[i],self.aug_labels[i],cx, cy,self.grid*(2**y_batch_index)) return y_batches
def score(self, input, i_contours): """Computes mean IOU on the given data and labels :param input: tuple with images and o-contours :param i_contours: i-contours labels :return: mean IOU """ i_contours_pred = self.predict(input) return utils.iou(i_contours, i_contours_pred)
def main_batch(): # get results with prediction on batch results = [] params_batch, img_batch = [], [] for _ in range(1000): params, img = noisy_circle(200, 50, 2) params_batch.append(params) img_batch.append(img) detected = find_circle_batch(img_batch) results = np.array( [iou(params_batch[i], detected[i]) for i in range(1000)]) print((results > 0.7).mean())
def gen_simple(size): plt.ion() for i, line in enumerate(open(LABEL_FILE_PATH)): if i > 1: strs = line.split() filename = strs[0].strip() x1 = int(strs[1].strip()) y1 = int(strs[2].strip()) w = int(strs[3].strip()) h = int(strs[4].strip()) x2 = x1 + w y2 = y1 + h cx = x1 + w / 2 cy = y1 + h / 2 for _ in range(5): dx = np.random.uniform(-0.2, 0.2) dy = np.random.uniform(-0.2, 0.2) dw = np.random.uniform(-0.2, 0.2) dh = np.random.uniform(-0.2, 0.2) _cx = cx * (1 + dx) _cy = cy * (1 + dy) _w = w * (1 + dw) _h = h * (1 + dh) _x1 = _cx - _w / 2 _y1 = _cy - _h / 2 _x2 = _x1 + _w _y2 = _y1 + _h box = np.array([_x1, _y1, _x2, _y2, 0]) boxes = np.array([[x1, y1, x2, y2, 0]]) im = Image.open(os.path.join(IMAGE_PATH, filename)) _box = utils.rect2squar(np.array([box]))[0] im = im.crop(_box[0:4]) im.resize((size, size)) iou = utils.iou(_box, boxes) plt.imshow(im) plt.pause(1) if iou[0] > 0.65: # 正样本 # im.show() # time.sleep(2) pass elif iou[0] > 0.4: # 部分样本 pass elif iou[0] < 0.3: # 负样本 pass
def eval(self): iou_for_videos = [] scores_for_videos = [] im = Image.open(self.TEST_IMAGE_PATHS[0]) IMAGE_HEIGHT = im.height IMAGE_WIDTH = im.width gt_file = open(self.PATH_TO_GROUNDTRUTH_RECT, 'r') text = gt_file.read() gt_file.close() text = text.replace(",", " ") gt_file = open(self.PATH_TO_GROUNDTRUTH_RECT, 'w') gt_file.write(text) gt_file.close() gt_rect = np.loadtxt(self.PATH_TO_GROUNDTRUTH_RECT) normalize_gt_rect = [] for gt in gt_rect: normalize_gt = [ gt[0] / IMAGE_WIDTH, gt[1] / IMAGE_HEIGHT, (gt[0] + gt[2]) / IMAGE_WIDTH, (gt[1] + gt[3]) / IMAGE_HEIGHT ] # 宽,高,宽,高 左上角与右下角 normalize_gt_rect.append(normalize_gt) normalize_gt_rect = np.array(normalize_gt_rect) for fileName, det_result in self.det_results.items(): frame_index = int(fileName[0:-4]) groundtruth_boxes = np.array(normalize_gt_rect[frame_index - 1:frame_index]) the_iou = 0 the_score = 0 for i in range(len(det_result["det_labels"])): if det_result['det_scores'][i] >= args.score: if names.get(str(det_result["det_labels"] [i].item())) == self.GROUND_TRUTH_CLASS: det_boxes = det_result["det_boxes"][i:i + 1] det_boxes[0][0] /= IMAGE_WIDTH det_boxes[0][1] /= IMAGE_HEIGHT det_boxes[0][2] /= IMAGE_WIDTH det_boxes[0][3] /= IMAGE_HEIGHT iou = utils.iou(det_boxes, groundtruth_boxes) if iou[0] > the_iou: the_iou = iou[0] the_score = det_result['det_scores'][i] iou_for_videos.append(the_iou) scores_for_videos.append(the_score) return iou_for_videos, scores_for_videos
def __getitem__(self, index): label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1]) bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist() img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0]) image = np.array(Image.open(img_path).convert("RGB")) if self.transform: augmentations = self.transform(image=image, bboxes=bboxes) image = augmentations["image"] bboxes = augmentations["bboxes"] # Below assumes 3 scale predictions (as paper) and same num of anchors per scale targets = [ torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S ] for box in bboxes: iou_anchors = iou( torch.tensor(box[2:4]).double(), torch.tensor(self.anchors).double()) anchor_indices = iou_anchors.argsort(descending=True, dim=0) x, y, width, height, class_label = box has_anchor = [False] * 3 # each scale should have one anchor for anchor_idx in anchor_indices: scale_idx = anchor_idx // self.num_anchors_per_scale anchor_on_scale = anchor_idx % self.num_anchors_per_scale S = self.S[scale_idx] i, j = int(S * y), int(S * x) # which cell anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0] if not anchor_taken and not has_anchor[scale_idx]: targets[scale_idx][anchor_on_scale, i, j, 0] = 1 x_cell, y_cell = S * x - j, S * y - i # both between [0,1] width_cell, height_cell = ( width * S, height * S, ) # can be greater than 1 since it's relative to cell box_coordinates = torch.tensor( [x_cell, y_cell, width_cell, height_cell]) targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label) has_anchor[scale_idx] = True elif not anchor_taken and iou_anchors[ anchor_idx] > self.ignore_iou_thresh: targets[scale_idx][anchor_on_scale, i, j, 0] = -1 # ignore prediction return image, tuple(targets)
def learnIOU(self, offset, size): offset2 = offset[:, 1:5] #过滤置信度 offsetNew = offset2 * size #还原到对应比例的位置 x1 = 0 - offsetNew[:, 0] y1 = 0 - offsetNew[:, 1] x2 = size - offsetNew[:, 2] y2 = size - offsetNew[:, 3] boxes = torch.stack((x1, y1, x2, y2), dim=1) iouValue = iou((0, 0, size, size), boxes.numpy()) #以(0,0,size,size)为右下角坐标 iouValue = np.where(iouValue == 1, 0, iouValue) #对负样本iou进行强制为0,因为标签中负样本偏移量默认为0 return iouValue
def train_step(net, imgs, labels, global_step, optimizer): """ Train step for a batch """ labels[labels >= 0.5] = 1. labels[labels < 0.5] = 0. labels = labels.astype('uint8') imgs = tf.image.convert_image_dtype(imgs, tf.float32) with tf.GradientTape() as tape: # Run image through segmentor net and get result seg_results = net(imgs) loss = tf.losses.sparse_softmax_cross_entropy(labels=tf.cast( labels, tf.int32), logits=seg_results) grads = tape.gradient(loss, net.trainable_variables) optimizer.apply_gradients(zip(grads, net.trainable_variables), global_step=global_step) batch_hds = [] batch_IoUs = [] batch_dices = [] pred_np = seg_results.numpy() batch_size = pred_np.shape[0] pred_np = np.argmax(pred_np, axis=-1) pred_np = np.expand_dims(pred_np, -1) for i in range(batch_size): label_slice = labels[i] pred_slice = pred_np[i] pred_locations = np.argwhere(pred_slice == 1) label_locations = np.argwhere(label_slice == 1) hd = hausdorf_distance(pred_locations, label_locations) batch_hds.append(hd) IoU = iou( pred_slice, label_slice ) #np.sum(pred_slice[label_slice == 1]) / float(np.sum(pred_slice) + np.sum(label_slice) - np.sum(pred_slice[label_slice == 1])) Dice = dice( pred_slice, label_slice ) #np.sum(pred_slice[label_slice == 1])*2 / float(np.sum(pred_slice) + np.sum(label_slice)) batch_IoUs.append(IoU) batch_dices.append(Dice) return loss, np.mean(batch_hds), np.mean(batch_IoUs), np.mean(batch_dices)
def ss(self, img_path, ground_truth): img = io.imread(img_path) img_lbl, regions = selectivesearch.selective_search(img) candidates = set() roi_positive = [] roi_negative = [] roi_backup = [] for r in regions: # excluding same rectangle (withsl different segments) if r['rect'] in candidates: continue # excluding regions smaller than 2000 pixels if r['size'] < 500: continue candidates.add(r['rect']) for i, ground in enumerate(ground_truth): class_index, xmin, xmax, ymin, ymax = ground[0] if utils.iou(r['rect'], (xmin.item(), ymin.item(), xmax.item() - xmin.item(), ymax.item() - ymin.item())) > 0.4: roi_positive.append((class_index, ) + r['rect']) elif utils.iou(r['rect'], (ymin.item(), xmin.item(), xmax.item() - xmin.item(), ymax.item() - ymin.item())) > 0.1: roi_negative.append((0, ) + r['rect']) else: roi_backup.append((0, ) + r['rect']) positive_num = min(len(roi_positive), 16) negative_num = min(len(roi_negative), 64 - positive_num) roi = random.sample(roi_positive, positive_num) roi += random.sample(roi_negative, negative_num) if positive_num + negative_num < 64: roi += random.sample(roi_backup, 64 - positive_num - negative_num) roi = torch.tensor(roi) return roi
def forward(self, input_, target_): clf_loss = reg_loss = 0 clf_preds = torch.cat([ x['clf'].permute(0, 2, 3, 1).reshape( target_.shape[0], -1, self.n_classes) for x in input_ ], dim=0) reg_preds = torch.cat([ x['reg'].permute(0, 2, 3, 1).reshape(target_.shape[0], -1, 4) for x in input_ ], dim=0) for img_n, (objects_, clf_, reg_) in enumerate(zip(target_, clf_preds, reg_preds)): prior_objects_iou = iou(objects_[:, 1:], self.priors) # (n_objects, n_prior_boxies) _, best_iou_inds = prior_objects_iou.max(dim=1) prior_objects_iou[best_iou_inds] = 1. _, objects_for_each_prior = prior_objects_iou.max(dim=0) clf_targets_for_each_prior = torch.where( objects_for_each_prior > 0, target_[objects_for_each_prior, 0], 0) positives = clf_targets_for_each_prior > 0 negatives = torch.logical_not(positives) n_positives = positives.sum() n_negatives = self.negative_ratio * n_positives # calculating positives loss for image pos_clf_preds, pos_reg_preds = clf_[positives], reg_[positives] pos_clf_targets = clf_targets_for_each_prior[positives] pos_clf_loss = self.clf_criterion(pos_clf_preds, pos_clf_targets.long()) reg_loss += self.reg_criterion() ############### # calculating negatives loss for image neg_clf_preds, _ = clf_[negatives], reg_[negatives] neg_clf_targets = clf_targets_for_each_prior[negatives] neg_clf_loss, _ = self.clf_criterion( neg_clf_preds, neg_clf_targets).sort(descending=True) neg_clf_loss = neg_clf_loss[:n_negatives] # calculating mean classification loss for image clf_loss += torch.mean(torch.cat(pos_clf_loss, neg_clf_loss)) clf_loss /= target_.shape[0] reg_loss /= target_.shape[0] loss = clf_loss + self.reg_alpha * reg_loss return loss
def __getItem__(self, index): label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1]) bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist() img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0]) image = np.array(Image.open(img_path).convert("RBG")) if self.transform: augmentations = self.transform(image=image, bboxes=bboxes) image = augmentations["image"] bboxes = augmentations["bboxes"] targets = [ torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S ] # [p_o, x, y, w, h, s] for box in bboxes: iou_anchors = iou(torch.tensor(box[2:4]), self.anchors) anchor_indices = iou_anchors.argsort(descending=True, dim=0) x, y, width, class_label = box has_anchor = [False, False, False] for anchor_idx in anchor_indices: scale_idx = anchor_idx // self.num_anchors_per_scale # 0, 1, 2 anchor_on_scale = anchor_idx % self.num_anchors_per_scale # 0, 1, 2 S = self.S[scale_idx] i, j = int(S * y), int(S * x) # x = 0.5, S=13 --> int(6.5) = 6 anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0] if not anchor_taken and not has_anchor[scale_idx]: targets[scale_idx][anchor_on_scale, i, j, 0] = 1 x_cell, y_cell = S * x - j, S * y - i # both are between [0, 1] width_cell, height_cell = (width * S.height * S, ) box_coordinates = torch.Tensor( [x_cell, y_cell, width_cell, height_cell]) targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label) has_anchor[scale_idx] = True elif not anchor_taken and iou_anchors[ anchor_idx] > self.ignore_iou_thresh: targets[scale_idx][anchor_on_scale, i, j, 0] = -1 # ignore this prediction return image, tuple(targets)
def countTF(gtBox, predBox, threshold): TP, FP, FN = 0, 0, 0 if t.is_tensor(predBox): where = t.where else: where = np.where for gt in gtBox: res = iou(gt, predBox) temp = len(where(res > threshold)[0]) TP += temp FP += len(res) - temp if temp == 0: FN += 1 return TP, FP, FN
def kmeans(X, K, max_iter=100, tol=1e-7): '''Run K-means on data X. Args: X: (tensor) data, sized [N,D]. K: (int) number of clusters. max_iter: (int) max number of iterations. tol: (float) loss tolerance between two iterations. Returns: (tensor) centroids, sized [K,D]. ''' N, D = X.size() assert N >= K, 'Too few samples for K-means' # Randomly pick the centroids ids = torch.randperm(N)[:K] centroids = X[ids].clone() # Pick centroids with K-means++ # centroids = init_centroids(X, K) last_loss = 0 for it in range(max_iter): # Assign each sample to the nearest centroid groups = [[] for i in range(K)] dist_sum = 0 for i in range(N): x = X[i].view(1, 4) dists = 1 - iou(x, centroids) # dists = (x.expand_as(centroids) - centroids).pow(2).sum(1).sqrt() min_dist, centroid_idx = dists.squeeze().min(0) groups[centroid_idx[0]].append(i) dist_sum += min_dist[0] loss = dist_sum / N print('iter: %d/%d loss: %f avg_iou: %f' % (it, max_iter, loss, 1 - loss)) # Compute the new centroids centroids = [] for i in range(K): group_i = torch.LongTensor(groups[i]) centroids.append(pick_centroid_from_cluster(X[group_i])) centroids = torch.stack(centroids) if abs(last_loss - loss) < tol: break last_loss = loss return centroids
def evaluate_model(test_dataset, model, **kwargs): device = kwargs['device'] batch_size = kwargs['batch_size'] threshold = kwargs['threshold'] loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) with torch.no_grad(): avg_acc = 0 avg_iou = 0 avg_cm = {'TP': 0, 'TN': 0, 'FP': 0, 'FN': 0} for step, (img, _, mask) in enumerate(loader): step = step + 1 start = time.time() img = img.to(device) mask = mask.to(device) output = model(img) end = time.time() # Pixel Acc acc = utils.pixelwise_accuracy(output, mask, threshold) avg_acc = avg_acc + acc # IOU iou = utils.iou(output, mask, threshold) avg_iou = avg_iou + iou # Confusion matrix cm = utils.confusion_matrix(output, mask, threshold) for key in avg_cm.keys(): avg_cm[key] = avg_cm[key] + cm[key] msg = "Step : {}, Acc : {:0.3f}".format(step, acc) msg = msg + " IOU : {:0.3}".format(iou) msg = msg + " Speed: {:0.2f} imgs/ sec".format(img.shape[0] / (end - start)) print(msg) avg_acc = avg_acc / step avg_iou = avg_iou / step for key in avg_cm.keys(): avg_cm[key] = avg_cm[key] / step print("Average acc : {:0.3f}".format(avg_acc)) print("Average iou : {:0.3f}".format(avg_iou)) print("confusion matrix", avg_cm)
def gen_simple(size): for i, line in enumerate(open(LABEL_FILE_PATH)): if i > 1: strs = list(filter(bool, line.split(" "))) filename = strs[0].strip() x1 = int(strs[1].strip()) y1 = int(strs[2].strip()) w = int(strs[3].strip()) h = int(strs[4].strip()) x2 = x1 + w y2 = y1 + h cx = x1 + w / 2#中心位置 cy = y1 + h / 2 for _ in range(100): dx = np.random.uniform(-0.2, 0.2) dy = np.random.uniform(-0.2, 0.2) dw = np.random.uniform(-0.2, 0.2) dh = np.random.uniform(-0.2, 0.2) _cx = cx * (1 + dx) _cy = cy * (1 + dy)#中心变化了,宽和高也变化了 _w = w * (1 + dw) _h = h * (1 + dh) _x1 = _cx - _w / 2 _y1 = _cy - _h / 2 _x2 = _x1 + _w _y2 = _y1 + _h box = np.array([_x1, _y1, _x2, _y2, 0]) boxes = np.array([[x1,y1,x2,y2,0]]) im = Image.open(os.path.join(IMAGE_PATH,filename)) _box = utils.rect2squar(np.array([box]))[0]#????这步啥意思?? im = im.corp(_box[0:4]) im.resize(size) iou = utils.iou(_box, boxes) if iou[0] > 0.65:#正样本 pass elif iou[0]>0.4: #部分样本 pass elif iou[0]<0.3:#负样本 pass
def pick_centroid_from_cluster(X): '''Instead of choosing the mean of cluster as the centroid, I pick the centroid as a sample from the cluster with the maximum average iou. Args: X: (tensor) samples of a cluster. Return: (tensor) picked centroid from the cluster. ''' best_iou = -1 for x in X: iou_x = iou(x.view(1, 4), X) if iou_x.mean() > best_iou: best_iou = iou_x.mean() centroid = x return centroid
def get_distance_matrix(self, dets, frame): dists = np.zeros((len(dets), len(self.tracks)), np.float32) for itrack in range(len(self.tracks)): for ipred in range(len(dets)): desc_dist = np.linalg.norm(dets[ipred].hog - self.tracks[itrack].hog, ord=1) iou_overlap = iou(dets[ipred].corners(), self.tracks[itrack].corners()) uncertainety = np.maximum(1 - dets[ipred].conf, 0.5) dists[ipred, itrack] = uncertainety * ((1 - iou_overlap) + desc_dist) #dists[ipred,itrack] = ((1-iou_overlap)+desc_dist) return dists
def test_iou(self): box_a = tf.convert_to_tensor([ [0.1, 0.1, 0.2, 0.2], [100, 100, 200, 200], [0.1, 0.1, 0.2, 0.2], [1., 1., 1., 1.], ]) box_b = tf.convert_to_tensor([ [0.1, 0.1, 0.3, 0.3], [100, 100, 300, 300], [100, 100, 300, 300], [0., 0., 0., 0.], ]) actual = utils.iou(box_a, box_b) expected = tf.convert_to_tensor([0.25, 0.25, 0, 0]) a, e = self.evaluate([actual, expected]) assert np.allclose(a, e) assert a.shape == (4, )