def transform_annotation(x, orig_dim, model_dim):
    """Convert the annotation/target boxes from VoTT to a format understood by
    dataset class
    
    Arguments
    ---------
    x : string

        Consists of:
            label
            x_center measured from left (0-1)
            y_center measured from top (0-1)
            width (0-1)
            height (0-1)

    """
    if not x:
        return None
    boxes = np.array([a.rstrip().split(' ') for a in x], dtype='float32')

    # Get the bounding boxes and convert them into proper format
    category_ids = boxes[:, 0]
    boxes = boxes[:, 1:]
    boxes = np.array(boxes)

    # Change y_center to measure from bottom
    boxes[:, 1] = 1. - boxes[:, 1]
    boxes = boxes.reshape(-1, 4)

    # Center to corner
    boxes = center_to_corner_2d(boxes)

    boxes[:, [0, 2]] *= orig_dim[1]
    boxes[:, [1, 3]] *= orig_dim[0]

    category_ids = np.array(category_ids).reshape(-1, 1)
    ground_truth = np.concatenate([boxes, category_ids], 1).reshape(-1, 5)

    return ground_truth
示例#2
0
def write_results(prediction,
                  confidence,
                  num_classes,
                  model_dim,
                  orig_dim,
                  nms=True,
                  nms_conf=0.7):
    """
    Arguments
    ---------
    prediction : tensor (3D)
        [batch, image_id, [x_center, y_center, width, height, objectness_score, class_score1, class_score2, ...]]

    Returns
    --------
    output : tensor (2D)
        [image_id, [batch_index, x_1, y_1, x_2, y_2, objectness_score, class_index, class_probability]]
    """

    # Initialize to no output
    output = -1

    # Technically, this should always be 1
    batch_size = prediction.size(0)

    # Get rid of 1st dim
    orig_dim = orig_dim.squeeze(0)

    # If the entire batch contains 0 for objectness score, skip
    try:
        torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous()
    except:
        return -1

    # Keep track of if output has been compiled yet (for concatenation)
    write = False

    for ind in range(batch_size):
        pred = prediction[ind]

        if pred.shape[0] > 0:
            # Get x1y1x2y2
            pred = center_to_corner_2d(pred)

            # #Get the class having maximum score, and the index of that class
            # #Get rid of num_classes softmax scores
            # #Add the class index and the class score of class having maximum score
            max_conf_score, max_conf = torch.max(pred[:, 5:5 + num_classes], 1)
            max_conf = max_conf.float().unsqueeze(1)
            max_conf_score = max_conf_score.float().unsqueeze(1)
            seq = (pred[:, :5], max_conf, max_conf_score)
            image_pred = torch.cat(seq, 1)

            #Get rid of the zero entries for objectness
            non_zero_ind = (torch.nonzero(image_pred[:, 4]))
            image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)

            # Remove low confidence by class probs
            image_pred_ = image_pred_[image_pred_[:, -1] > confidence, :]

            #Get the various classes detected in the image
            try:
                img_classes = unique(image_pred_[:, -2].int())
                print('img_classes ', img_classes)
            except:
                continue

            #WE will do NMS classwise
            for label in img_classes:
                #get the detections with one particular class
                cls_mask_ind = (image_pred_[:, -2].int() == label)
                # class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
                image_pred_class = image_pred_[cls_mask_ind].view(-1, 7)

                #sort the detections such that the entry with the maximum objectness
                #confidence is at the top
                conf_sort_index = torch.sort(image_pred_class[:, 4],
                                             descending=True)[1]
                image_pred_class = image_pred_class[conf_sort_index]
                idx = image_pred_class.size(0)

                #if nms has to be done
                if nms:
                    #For each detection
                    for i in range(idx):
                        #Get the IOUs of all boxes that come after the one we are looking at
                        #in the loop
                        try:
                            ious = bbox_iou(image_pred_class[i].unsqueeze(0),
                                            image_pred_class[i + 1:])
                        except ValueError:
                            continue
                        except IndexError:
                            continue

                        # Zero out all the detections that have IoU > nms treshhold
                        iou_mask = (ious < nms_conf).float().unsqueeze(1)
                        image_pred_class[i + 1:] *= iou_mask

                        # Keep the non-zero entries for objectness
                        non_zero_ind = torch.nonzero(
                            image_pred_class[:, 4]).squeeze()
                        image_pred_class = image_pred_class[non_zero_ind].view(
                            -1, 7)

                batch_ind = image_pred_class.new(image_pred_class.size(0),
                                                 1).fill_(ind)
                seq = batch_ind, image_pred_class
                if not write:
                    output = torch.cat(seq, 1)
                    write = True
                else:
                    out = torch.cat(seq, 1)
                    output = torch.cat((output, out))
    return output
示例#3
0
        orig_w, orig_h = img_.shape[0], img_.shape[1]
        print(img_file)
        print(i)

        # Read image and prepare for input to network
        img, orig_im, orig_dim = prep_image(plt.imread(img_file), model_dim)
        orig_dim = torch.FloatTensor(orig_dim).repeat(1, 2)

        # Read ground truth labels
        ground_truths_file = img_file.replace(img_file.split('.')[-1], 'txt')
        with open(ground_truths_file, 'r') as f:
            # Read ground truth file and transform annotation to:  xc,yc,w,h,label
            ground_truths = transform_annotation(f.readlines(), img_.shape,
                                                 model_dim)
            # Convert xc,yc,w,h --> x1,y1,x2,y2
            ground_truths = center_to_corner_2d(ground_truths)
            # Scale up from model size to original image size

            # Go from square (0-1)*model_dim to original image shape scale
            ground_truths[:, [0, 2]] *= orig_w / model_dim
            ground_truths[:, [1, 3]] *= orig_h / model_dim

        class_labels = ground_truths[:, -1]
        num_gts += ground_truths.shape[0]

        img = img.to(device)
        with torch.no_grad():
            output = model(img)

        # NB, output is:
        # [batch, image_id, [x_center, y_center, width, height, objectness_score, class_score1, class_score2, ...]]
示例#4
0
    os.makedirs(eval_output_dir, exist_ok=True)

    for i, (image, ground_truth, filepath) in enumerate(test_loader):
        
        img_file = filepath[0].rstrip()
        img_ = plt.imread(img_file)
        orig_h, orig_w = img_.shape[0], img_.shape[1]
        orig_dim = torch.FloatTensor([orig_w, orig_h]).repeat(1,2)
        print(img_file)
        print(i)

        # Deal with ground truth: read, convert to corners and scale
        ground_truths = []
        gt_file = '.'.join(img_file.split('.')[:-1]) + '.txt'
        gt_df = np.array(pd.read_csv(gt_file, sep=' ', header=None))
        gt_df = center_to_corner_2d(gt_df[:, 1:])
        orig_dim_np = np.array([orig_w, orig_h, orig_w, orig_h])
        gt_df *= orig_dim_np
        ground_truths.append(gt_df)
        num_gts = gt_df.shape[0]

        # ground_truths = []
        # if len(ground_truth) == 0:
        #     continue
        # else:
        #     ground_truths.append(ground_truth)

        # Predict on input test image
        image = image.to(device)
        with torch.no_grad():        
            output = model(image)