示例#1
0
    def __initialize_anchor_boxes(self):
        """
        :return:
        """
        LOGGER.debug('Entered')

        anchor_boxes = None
        vgg_output_dim = self.img_size // VGG_SCALE_SIZE
        count = 0
        for pixel_x in range(vgg_output_dim):
            for pixel_y in range(vgg_output_dim):
                for siz in SCALES:
                    scaled_size = vgg_output_dim * siz
                    for ratio in ASPECT_RATIOS:
                        h_bbox = scaled_size // ratio[0]
                        w_bbox = scaled_size // ratio[1]
                        anchor_box = torch.tensor(
                            [[pixel_x, pixel_y, w_bbox, h_bbox]])
                        if count == 0:
                            anchor_boxes = anchor_box
                        else:
                            anchor_boxes = torch.cat(
                                (anchor_boxes, anchor_box))
                        count = count + 1
        LOGGER.debug('Total Anchor Boxes = %s', str(len(anchor_box)))
        return anchor_boxes.float()
示例#2
0
def normalise_image_size_and_bbox():
    create_dir(NORMALISED_IMAGES_PATH)
    create_dir(NORMALISED_BBOX_IMAGES_PATH)
    LOGGER.debug('Normalising image sizes to same size')
    fp = open(MIN_DIM, 'rb')
    dim = pickle.load(fp)
    LOGGER.debug('Normalising all images to %d x %d', dim, dim)
    bbox_json = read_json_file(BBOX_JSON_PATH)
    norm_bbox_json = {}
    for image in bbox_json.keys():
        img = cv2.imread(RAW_TRAIN_IMAGES_PATH + image)
        y_ = img.shape[0]
        x_ = img.shape[1]
        img = cv2.resize(img, (dim, dim))
        cv2.imwrite(NORMALISED_IMAGES_PATH + image, img)
        x_scale = dim / x_
        y_scale = dim / y_
        tc_x = int(np.round(bbox_json[image][0][0] * x_scale))
        tc_y = int(np.round(bbox_json[image][0][1] * y_scale))
        bl_x = int(np.round(bbox_json[image][1][0] * x_scale))
        bl_y = int(np.round(bbox_json[image][1][1] * y_scale))
        norm_bbox_json[image] = [tc_x, tc_y, bl_x, bl_y]

        im_bbox = cv2.rectangle(img, (tc_x, tc_y), (bl_x, bl_y), (255, 0, 0),
                                2)
        cv2.imwrite(NORMALISED_BBOX_IMAGES_PATH + image, im_bbox)

    write_json_to_file(norm_bbox_json, NORM_BBOX_JSON_PATH)
示例#3
0
    def model_inference(self):
        """
        Functionality we are trying to achieve
            1. get the anchors which are positively predicted (indexes)
            2. get the dim of anchors using index
            3. get the delta of anchors using pred index, add and we get set of anchor boxes
            4. scale the acnhor boxes by multiplying by 16
            5. we have regions
            6. reshape to diagonals and put bounding box over it
        """
        abox = AnchorBox()
        create_dir(TEST_OUTPUT_PATH)
        image_list = os.listdir(TEST_IMAGES_PATH)
        all_predicted_diagonals = []
        LOGGER.info('Inference begun')
        for image in image_list:
            path_of_image = TEST_IMAGES_PATH + '/' + image
            img = cv2.resize(cv2.imread(path_of_image),
                             (self.image_size, self.image_size))
            img = torch.tensor(img).float().permute(2, 0, 1).unsqueeze(0)
            img = img.to(self.device)

            pred_cls, pred_reg = self.forward(img)
            pred_cls = pred_cls.detach().cpu()
            pred_reg = pred_reg.detach().cpu()
            foreground_idx = 0
            scores = []

            for cls_out in pred_cls[0]:
                cls_out = Softmax(dim=0).forward(cls_out)
                if cls_out[torch.argmax(cls_out).item()] > 0.8:
                    # foreground_prob = torch.max(Softmax(dim=0).forward(cls_out)).item()
                    # foreground_prob = torch.max(torch.exp(LogSoftmax(dim=0).forward(cls_out))).item()
                    foreground_prob = torch.max(cls_out).item()
                    scores.append(foreground_prob)

                    delta_xywh = pred_reg[0][foreground_idx]
                    anchor_xywh = abox.anchor_boxes[foreground_idx]
                    proposed_region = torch.mul(
                        torch.add(delta_xywh, anchor_xywh),
                        torch.tensor([VGG_SCALE_SIZE]).float())
                    diag = get_diagonal_from_mpwh(proposed_region.tolist())
                    all_predicted_diagonals.append(
                        [diag[0][0], diag[0][1], diag[1][0], diag[1][1]])
                foreground_idx = foreground_idx + 1
            LOGGER.debug('Length of predicted_diagonals: %s',
                         str(all_predicted_diagonals))
            boxes = torch.tensor(all_predicted_diagonals)
            scores = torch.tensor(scores)
            keep, max_count = non_max_suppress(boxes, scores)

            for idx in range(max_count):
                box = boxes[keep[idx].item()]
                img = cv2.imread(path_of_image)
                img = cv2.resize(img, (self.image_size, self.image_size))
                im_bbox = cv2.rectangle(img, (box[0], box[1]),
                                        (box[2], box[3]), (255, 0, 0), 2)
                cv2.imwrite(TEST_OUTPUT_PATH + image, im_bbox)
示例#4
0
def get_img_bbox_coors(limit=9999999999999999999):
    create_dir(BBOX_IMAGES_PATH)
    LOGGER.debug('Getting image map with coordinates for Bounding Box')
    json_processed = dict()
    count = 0
    for image in IMAGES:
        if image['file_name'] in IMAGES_IN_DIR:
            image_id = image['id']
            tc_coor, bl_coor = get_coors_from_annotation_by_id(image_id)
            json_processed[image['file_name']] = [tc_coor, bl_coor]
            print(json_processed)
            count += 1
        if count >= limit:
            break
    write_json_to_file(json_processed, BBOX_JSON_PATH)
示例#5
0
 def model_inference(self):
     create_dir(TEST_OUTPUT_PATH)
     image_list = os.listdir(TEST_IMAGES_PATH)
     LOGGER.info('Inference begun')
     for image in image_list:
         path_of_image = TEST_IMAGES_PATH + '/' + image
         img = cv2.resize(cv2.imread(path_of_image),
                          (self.image_size, self.image_size))
         img = torch.tensor(img).float().permute(2, 0, 1).unsqueeze(0)
         img = img.to(self.device)
         predicted_width, predicted_height, predicted_midpoint = self.forward(
             img)
         mp_x = predicted_midpoint[0][0].detach().cpu().item()
         mp_y = predicted_midpoint[0][1].detach().cpu().item()
         w = predicted_width[0].detach().cpu().item()
         h = predicted_height[0].detach().cpu().item()
         diag = get_diagonal_from_mpwh([mp_x, mp_y, w, h])
         img = cv2.imread(path_of_image)
         img = cv2.resize(img, (self.image_size, self.image_size))
         im_bbox = cv2.rectangle(img, diag[0], diag[1], (255, 0, 0), 2)
         cv2.imwrite(TEST_OUTPUT_PATH + image, im_bbox)
示例#6
0
def create_bbox():
    LOGGER.debug('Creating Bounded Boxes')
    json_bbox = read_json_file(BBOX_JSON_PATH)
    for key, value in json_bbox.items():
        im_rd = cv2.imread(RAW_TRAIN_IMAGES_PATH + key)
        im_bbox = cv2.rectangle(im_rd, (value[0][0], value[0][1]),
                                (value[1][0], value[1][1]), (255, 0, 0), 2)
        cv2.imwrite(BBOX_IMAGES_PATH + key, im_bbox)
    LOGGER.debug('Created all bounding boxes for %d images',
                 len(json_bbox.keys()))
    LOGGER.debug('Images with bounding boxes saved to %s', BBOX_IMAGES_PATH)
示例#7
0
    def model_train(self, epoch_offset=0):
        create_dir(MODEL_SAVE_PATH)
        loss_for_regression = MSELoss()
        img_coors_json = read_json_file(BBOX_XYWH_JSON_PATH)

        optimizer = RMSprop(self.parameters(),
                            lr=LEARNING_RATE,
                            momentum=MOMENTUM)
        # optimizer = Adam(self.parameters(), lr=LEARNING_RATE)
        #         optimizer = SGD(self.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)

        scheduler = StepLR(optimizer,
                           step_size=SCHEDULER_STEP,
                           gamma=SCHEDULER_GAMMA)

        for epoch in range(EPOCHS):
            epoch_loss = 0.0
            scheduler.step(epoch)
            LOGGER.debug('Epoch: %s, Current Learning Rate: %s',
                         str(epoch + epoch_offset), str(scheduler.get_lr()))
            for image, coors in img_coors_json.items():
                path_of_image = NORMALISED_IMAGES_PATH + image
                path_of_image = path_of_image.replace('%', '_')
                img = cv2.imread(path_of_image)
                img = torch.tensor(img).float().permute(2, 0, 1).unsqueeze(0)
                img = img.to(self.device)
                predicted_width, predicted_height, predicted_midpoint = self.forward(
                    img)

                #all are scaled
                mp_x = coors[0][0]
                mp_y = coors[0][1]
                mp = torch.cat((torch.tensor([[mp_x]]).to(
                    self.device), torch.tensor([[mp_y]]).to(self.device)),
                               dim=1).float()

                w = coors[0][2]
                h = coors[0][3]
                loss1 = loss_for_regression(
                    predicted_height,
                    torch.tensor([[h]]).float().to(self.device))
                loss2 = loss_for_regression(
                    predicted_width,
                    torch.tensor([[w]]).float().to(self.device))
                loss3 = loss_for_regression(predicted_midpoint,
                                            mp.to(self.device))
                loss = loss1 + loss2 + loss3 / 2
                optimizer.zero_grad()
                loss.backward()
                clip_grad_norm(self.parameters(), 0.5)
                optimizer.step()
                epoch_loss = epoch_loss + loss.item()

            if epoch % 5 == 0:
                print('epoch: ' + str(epoch) + ' ' + 'loss: ' +
                      str(epoch_loss))
            if epoch % EPOCH_SAVE_INTERVAL == 0:
                print('saving')
                torch.save(
                    self.state_dict(), MODEL_SAVE_PATH + 'model_epc_' +
                    str(epoch + epoch_offset) + '.pt')
        torch.save(
            self.state_dict(),
            MODEL_SAVE_PATH + 'model_epc_' + str(epoch + epoch_offset) + '.pt')
示例#8
0
    def model_train(self, epoch_offset=0, lamda=10, nreg=2400, ncls=256):
        LOGGER.info('Started Training with an offset of %s', str(epoch_offset))
        create_dir(MODEL_SAVE_PATH)
        optimizer = SGD(self.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
        scheduler = StepLR(optimizer,
                           step_size=SCHEDULER_STEP,
                           gamma=SCHEDULER_GAMMA)
        LOGGER.info(
            'Learning Rate: %s, Momentum: %s, Scheduler_step: %s, scheduler_gamma: %s',
            str(LEARNING_RATE), str(MOMENTUM), str(SCHEDULER_STEP),
            str(SCHEDULER_GAMMA))
        loss_for_classification = NLLLoss()
        loss_for_regression = SmoothL1Loss()

        img_coors_json = read_json_file(BBOX_XYWH_JSON_PATH)

        anchor_box = AnchorBox()
        all_background_index = []
        all_foreground_index = []
        all_reg_tensor = []
        for image, coors in img_coors_json.items():
            li_fore_index, li_back_index, reg_ten_actual = \
                anchor_box.calculate_p_for_each_anchor_box(anchor_box.anchor_boxes, coors)
            all_background_index.append(li_back_index)
            all_foreground_index.append(li_fore_index)
            all_reg_tensor.append(reg_ten_actual)

        for epoch in range(EPOCHS):
            epoch_loss = 0.0
            scheduler.step(epoch)
            LOGGER.debug('Epoch: %s, Current Learning Rate: %s',
                         str(epoch + epoch_offset), str(scheduler.get_lr()))
            count = 0
            for image, coors in img_coors_json.items():
                img = cv2.imread(NORMALISED_IMAGES_PATH + image)
                img = torch.tensor(img).float().permute(2, 0, 1).unsqueeze(0)
                img = img.to(self.device)
                pred_cls, pred_reg = self.forward(img)
                li_foreground_index = all_foreground_index[count]
                li_background_index = all_background_index[count]
                reg_tensor_actual = all_reg_tensor[count]
                count = count + 1

                exp_torch_fg_bg = []
                pred_torch_fg = torch.zeros(1, pred_cls.shape[2])
                pred_torch_fg = pred_torch_fg.to(self.device)
                pred_torch_reg = torch.zeros(1, pred_reg.shape[2])
                pred_torch_reg = pred_torch_reg.to(self.device)
                for idx_foreground in li_foreground_index:
                    exp_torch_fg_bg.append(1)
                    pred_torch_fg = torch.cat(
                        (pred_torch_fg,
                         pred_cls[0][idx_foreground].unsqueeze(0)),
                        dim=0)
                    pred_torch_reg = torch.cat(
                        (pred_torch_reg,
                         pred_reg[0][idx_foreground].unsqueeze(0)),
                        dim=0)
                pred_torch_fg = pred_torch_fg[1:]
                pred_torch_reg = pred_torch_reg[1:]

                pred_torch_bg = torch.zeros(1, pred_cls.shape[2])
                pred_torch_bg = pred_torch_bg.to(self.device)
                for idx_background in li_background_index:
                    exp_torch_fg_bg.append(0)
                    pred_torch_bg = torch.cat(
                        (pred_torch_bg,
                         pred_cls[0][idx_background].unsqueeze(0)),
                        dim=0)
                pred_torch_bg = pred_torch_bg[1:]

                pred_cls_only_background_foreground = torch.cat(
                    (pred_torch_fg, pred_torch_bg), dim=0)
                pred_cls_only_background_foreground = LogSoftmax(dim=1).\
                    forward(pred_cls_only_background_foreground)

                exp_torch_fg_bg = torch.tensor(exp_torch_fg_bg)

                exp_torch_fg_bg = exp_torch_fg_bg.to(self.device)
                pred_cls_only_background_foreground = pred_cls_only_background_foreground.to(
                    self.device)
                reg_tensor_actual = reg_tensor_actual.to(self.device)
                pred_torch_reg = pred_torch_reg.to(self.device)
                cls_loss = loss_for_classification(
                    pred_cls_only_background_foreground, exp_torch_fg_bg)
                reg_loss = loss_for_regression(reg_tensor_actual,
                                               pred_torch_reg)
                total_image_loss = (cls_loss / ncls) + (reg_loss * lamda /
                                                        nreg)
                total_image_loss = total_image_loss.to(self.device)
                optimizer.zero_grad()
                total_image_loss.backward()
                optimizer.step()
                epoch_loss = epoch_loss + total_image_loss.item()
            LOGGER.debug('Loss at Epoch %s: %s', str(epoch + epoch_offset),
                         str(epoch_loss))
            if epoch % EPOCH_SAVE_INTERVAL == 0:
                torch.save(
                    self.state_dict(), MODEL_SAVE_PATH + 'model_epc_' +
                    str(epoch + epoch_offset) + '.pt')
            if epoch % 5 == 0:
                LOGGER.info('Loss at Epoch %s: %s', str(epoch + epoch_offset),
                            str(epoch_loss))
示例#9
0
    def calculate_p_for_each_anchor_box(anchor_boxes_list,
                                        ground_truths,
                                        iou_threshold=0.7,
                                        background_iou_threshold=0.3):
        """
        This is for the classification network of the region proposal network
            this gives whether to consider an anchor box or not i.e 0,1
        :param anchor_boxes_list: requires the anchor box to bet
        :param ground_truths: this is a list of ground truth box [[class, x,y,w,h], [class, x,y,w,h]]
        :param iou_threshold:
        :param background_iou_threshold:
        :return:
            number of bounding boxes, 2
        """
        LOGGER.debug('Entered')

        li_background_index = []
        li_foreground_index = []
        reg_tensor = torch.zeros(1, 4)

        max_index = 0
        max_iou = 0
        for img_gnd_truth in ground_truths:
            gt_bbox_coor = [
                gt_coor // VGG_SCALE_SIZE for gt_coor in img_gnd_truth
            ]
            count = 0
            for anchor in anchor_boxes_list:
                iou = calculate_iou(anchor.tolist(), gt_bbox_coor)
                if iou > iou_threshold:
                    reg_tensor = torch.cat(
                        (reg_tensor,
                         torch.sub(torch.tensor(gt_bbox_coor).float(),
                                   anchor).unsqueeze(0)),
                        dim=0)
                    li_foreground_index.append(count)
                elif iou < background_iou_threshold:
                    li_background_index.append(count)
                else:
                    if iou > max_iou:
                        max_index = count
                        max_iou = iou
                count = count + 1
            if len(li_foreground_index) == 0:
                li_foreground_index.append(max_index)
                reg_tensor = torch.cat(
                    (reg_tensor,
                     torch.sub(
                         torch.tensor(gt_bbox_coor).float(),
                         anchor_boxes_list[max_index].float()).unsqueeze(0)),
                    dim=0)

            if max_index in li_background_index:
                li_background_index.remove(max_index)
        LOGGER.debug('Detected %s anchors in foreground with iou > %s',
                     str(len(li_foreground_index)), str(iou_threshold))
        LOGGER.debug('Detected %s anchors in background with iou < %s',
                     str(len(li_background_index)),
                     str(background_iou_threshold))

        return li_foreground_index, li_background_index, reg_tensor[1:, :]