def gen_pnet_data(data_dir, anno_file, prefix):
    neg_save_dir = os.path.join(data_dir, "12/negative")
    pos_save_dir = os.path.join(data_dir, "12/positive")
    part_save_dir = os.path.join(data_dir, "12/part")

    for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]:  # make
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)


#     save_dir = os.path.join(data_dir, "pnet")
#     if not os.path.exists(save_dir):
#         os.mkdir(save_dir)

    post_save_file = os.path.join(config.ANNO_STORE_DIR,
                                  config.PNET_POSTIVE_ANNO_FILENAME)
    neg_save_file = os.path.join(config.ANNO_STORE_DIR,
                                 config.PNET_NEGATIVE_ANNO_FILENAME)
    part_save_file = os.path.join(config.ANNO_STORE_DIR,
                                  config.PNET_PART_ANNO_FILENAME)

    f1 = open(post_save_file, 'w')
    f2 = open(neg_save_file, 'w')
    f3 = open(part_save_file, 'w')

    with open(anno_file, 'r') as f:
        annotations = f.readlines()

    num = len(annotations)
    print("%d pics in total" % num)

    p_idx = 0  # positive examples index
    n_idx = 0  # negative examples index
    d_idx = 0  # partface examples index
    idx = 0  # pics index
    box_idx = 0  # boxes index

    for annotation in annotations:
        annotation = annotation.strip().split(' ')
        im_path = os.path.join(prefix, annotation[0])  # image_path
        #         print(im_path)
        bbox = list(map(
            float,
            annotation[1:]))  # map()函数是将func作用于seq中的每一个元素,并将所有的调用的结果作为一个list返回
        boxes = np.array(bbox, dtype=np.int32).reshape(-1, 4)  # N*4 dim array
        img = cv2.imread(im_path)
        idx += 1

        if idx % 100 == 0:
            print(idx, "images done")

        height, width, channel = img.shape

        neg_num = 0
        while neg_num < 50:
            size = npr.randint(12, min(width, height) / 2)
            nx = npr.randint(0, width - size)
            ny = npr.randint(0, height - size)
            crop_box = np.array([nx, ny, nx + size, ny + size])

            Iou = IoU(crop_box, boxes)

            if np.max(Iou) < 0.3:
                # Iou with all gts must below 0.3
                save_file = os.path.join(neg_save_dir,
                                         "%s.jpg" % n_idx)  # save neg image
                f2.write(save_file + ' 0\n')
                cropped_im = img[ny:ny + size, nx:nx + size, :]
                resized_im = cv2.resize(cropped_im, (12, 12),
                                        interpolation=cv2.INTER_LINEAR)
                cv2.imwrite(save_file, resized_im)
                n_idx += 1
                neg_num += 1

        for box in boxes:
            # box (x_left, y_top, x_right, y_bottom)
            x1, y1, x2, y2 = box
            w = x2 - x1
            h = y2 - y1

            # ignore small faces
            # in case the ground truth boxes of small faces are not accurate
            if max(w, h) < 40 or x1 < 0 or y1 < 0:
                continue

            # generate negative examples that have overlap with gt
            for i in range(5):
                size = npr.randint(12, min(width, height) / 2)
                # delta_x and delta_y are offsets of (x1, y1)
                delta_x = npr.randint(max(-size, -x1), w)
                delta_y = npr.randint(max(-size, -y1), h)
                nx1 = max(0, x1 + delta_x)
                ny1 = max(0, y1 + delta_y)

                if nx1 + size > width or ny1 + size > height:
                    continue
                crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size])
                Iou = IoU(crop_box, boxes)

                if np.max(Iou) < 0.3:
                    # Iou with all gts must below 0.3
                    save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                    cropped_im = img[ny1:ny1 + size, nx1:nx1 + size, :]
                    resized_im = cv2.resize(cropped_im, (12, 12),
                                            interpolation=cv2.INTER_LINEAR)
                    f2.write(save_file + ' 0\n')  # neg samples with label 0
                    cv2.imwrite(save_file, resized_im)
                    n_idx += 1

            # generate positive examples and part faces
            # 每个box随机生成50个box,Iou>=0.65的作为positive examples,0.4<=Iou<0.65的作为part faces,其他忽略
            for i in range(20):
                size = npr.randint(int(min(w, h) * 0.8),
                                   np.ceil(1.25 * max(w, h)))

                # delta here is the offset of box center
                delta_x = npr.randint(-w * 0.2, w * 0.2)
                delta_y = npr.randint(-h * 0.2, h * 0.2)

                nx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0))
                ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0))
                nx2 = int(nx1 + size)
                ny2 = int(ny1 + size)

                if nx2 > width or ny2 > height:
                    continue
                crop_box = np.array([nx1, ny1, nx2, ny2])

                # bbox偏移量的计算,由 x1 = nx1 + float(size)*offset_x1 推导而来
                offset_x1 = (x1 - nx1) / float(size)
                offset_y1 = (y1 - ny1) / float(size)
                offset_x2 = (x2 - nx2) / float(size)
                offset_y2 = (y2 - ny2) / float(size)

                cropped_im = img[ny1:ny2, nx1:nx2, :]
                resized_im = cv2.resize(cropped_im, (12, 12),
                                        interpolation=cv2.INTER_LINEAR)

                box_ = box.reshape(1, -1)
                if IoU(crop_box, box_) >= 0.65:
                    save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
                    f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' %
                             (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1
                elif IoU(crop_box, box_) >= 0.4:
                    save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
                    f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' %
                             (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1
            box_idx += 1
            print("%s images done, pos: %s part: %s neg: %s" %
                  (idx, p_idx, d_idx, n_idx))

    f1.close()
    f2.close()
    f3.close()
Exemplo n.º 2
0
def get_rnet_sample_data(data_dir, anno_file, det_boxes_file, prefix_path):
    neg_save_dir = os.path.join(data_dir, "24/negative")
    pos_save_dir = os.path.join(data_dir, "24/positive")
    part_save_dir = os.path.join(data_dir, "24/part")

    for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]:
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

    # load ground truth from annotation file
    # format of each line: image/path [x1, y1, x2, y2] for each gt_box in this image
    with open(anno_file, 'r') as f:
        annotations = f.readlines()

    image_size = 24
    im_idx_list = list()
    gt_boxes_list = list()
    num_of_images = len(annotations)
    print("processing %d images in total" % num_of_images)
    for annotation in annotations:
        # for i in range(10):
        annotation = annotation.strip().split(' ')
        # annotation = annotations[i].strip().split(' ')
        im_idx = os.path.join(prefix_path, annotation[0])
        boxes = list(map(float, annotation[1:]))
        boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
        im_idx_list.append(im_idx)
        gt_boxes_list.append(boxes)

    save_path = config.ANNO_STORE_DIR
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w')
    f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w')
    f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w')

    det_handle = open(det_boxes_file, 'rb')
    det_boxes = pickle.load(det_handle)
    print(len(det_boxes), num_of_images)
    assert len(
        det_boxes) == num_of_images, "incorrect detections or ground truths"

    # index of neg, pos and part face, used as their image names
    n_idx = 0
    p_idx = 0
    d_idx = 0
    image_done = 0
    for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
        image_done += 1
        if image_done % 100 == 0:
            print("%d images done" % image_done)
        if dets.shape[0] == 0:
            continue
        img = cv2.imread(im_idx)
        # dets = convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        # each image have at most 50 neg_samples
        cur_n_idx = 0
        for box in dets:
            x_left, y_top, x_right, y_bottom = box[0:4].astype(int)
            width = x_right - x_left
            height = y_bottom - y_top
            # ignore box that is too small or beyond image border
            if width < 20 or height < 20 or x_left <= 0 or y_top <= 0 or x_right >= img.shape[
                    1] or y_bottom >= img.shape[0]:
                continue
            # compute intersection over union(IoU) between current box and all gt boxes
            Iou = IoU(box, gts)
            cropped_im = img[y_top:y_bottom, x_left:x_right, :]
            resized_im = cv2.resize(cropped_im, (image_size, image_size),
                                    interpolation=cv2.INTER_LINEAR)
            # save negative images and write label

            if np.max(Iou) < 0.3:
                # Iou with all gts must below 0.3
                cur_n_idx += 1
                if cur_n_idx <= 50:
                    save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                    f2.write(save_file + ' 0\n')
                    cv2.imwrite(save_file, resized_im)
                    n_idx += 1
            else:
                # find gt_box with the highest iou
                idx = np.argmax(Iou)
                assigned_gt = gts[idx]
                x1, y1, x2, y2 = assigned_gt

                # compute bbox reg label
                offset_x1 = (x1 - x_left) / float(width)
                offset_y1 = (y1 - y_top) / float(height)
                offset_x2 = (x2 - x_right) / float(width)
                offset_y2 = (y2 - y_bottom) / float(height)

                # save positive and part-face images and write labels
                if np.max(Iou) >= 0.65:
                    save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
                    f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' %
                             (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1

                elif np.max(Iou) >= 0.4:
                    save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
                    f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' %
                             (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1
        print("%s images done, pos: %s part: %s neg: %s" %
              (im_idx, p_idx, d_idx, n_idx))

    f1.close()
    f2.close()
    f3.close()
Exemplo n.º 3
0
    def myloss(self, anchors, y_pred, y_true):
        self.reso = 352
        self.anchors = anchors

        loss = dict()

        # 1. Prepare
        # 1.1 re-organize y_pred
        # [bs, (5+nC)*nA, gs, gs] => [bs, num_anchors, gs, gs, 5+nC]
        bs, _, gs, _ = y_pred.size()
        nA = len(self.anchors)
        nC = self.num_classes
        y_pred = y_pred.view(bs, nA, 5 + nC, gs, gs)
        y_pred = y_pred.permute(0, 1, 3, 4, 2)

        # 1.3 prepare anchor boxes
        stride = self.reso // gs
        anchors = [(a[0] / stride, a[1] / stride) for a in self.anchors]
        anchor_bboxes = torch.zeros(3, 4).cuda()
        anchor_bboxes[:, 2:] = torch.Tensor(anchors)

        anchor_bboxes = anchor_bboxes.repeat(bs, 1, 1)

        # 2. Build gt [tx, ty, tw, th] and masks
        # TODO: f1 score implementation
        # total_num = 0
        gt_tx = torch.zeros(bs, nA, gs, gs, requires_grad=False)
        gt_ty = torch.zeros(bs, nA, gs, gs, requires_grad=False)
        gt_tw = torch.zeros(bs, nA, gs, gs, requires_grad=False)
        gt_th = torch.zeros(bs, nA, gs, gs, requires_grad=False)
        obj_mask = torch.zeros(bs, nA, gs, gs, requires_grad=False)
        non_obj_mask = torch.ones(bs, nA, gs, gs, requires_grad=False)
        cls_mask = torch.zeros(bs, nA, gs, gs, nC, requires_grad=False)
        start = time.time()
        # for batch_idx in range(bs):
        #     for box_idx, y_true_one in enumerate(y_true[batch_idx]):
        # total_num += 1
        gt_bbox = y_true[:, :, :4] * gs  # scale bbox relative to feature map
        gt_cls_label = y_true[:, :, 4].int()

        # gt_xc, gt_yc, gt_w, gt_h = gt_bbox[:,:,0:4]
        gt_xc = gt_bbox[:, :, 0]
        gt_yc = gt_bbox[:, :, 1]
        gt_w = gt_bbox[:, :, 2]
        gt_h = gt_bbox[:, :, 3]
        gt_i, gt_j = gt_xc.int(), gt_yc.int()
        gt_box_shape = y_true[:, :, :4] * gs
        gt_box_shape[:, :, 0:2] = 0
        # gt_box_shape = torch.Tensor([0, 0, gt_w, gt_h]).unsqueeze(0).cuda()
        anch_ious = bbox_iou(gt_box_shape.view(self.batch_size, 1, 4),
                             anchor_bboxes.cuda())
        anchor_ious = IoU(gt_box_shape, anchor_bboxes, format='center')
        best_anchor = np.argmax(anchor_ious)
        anchor_w, anchor_h = anchors[best_anchor]

        gt_tw[:, best_anchor, gt_i, gt_j] = torch.log(gt_w / anchor_w + 1e-16)
        gt_th[:, best_anchor, gt_i, gt_j] = torch.log(gt_h / anchor_h + 1e-16)
        gt_tx[:, best_anchor, gt_i, gt_j] = gt_xc - gt_i
        gt_ty[:, best_anchor, gt_i, gt_j] = gt_yc - gt_j

        obj_mask[:, best_anchor, gt_i, gt_j] = 1
        non_obj_mask[:, anchor_ious > 0.5] = 0  # FIXME: 0.5 as variable
        cls_mask[:, best_anchor, gt_i, gt_j, gt_cls_label] = 1

        # 3. activate raw y_pred
        end = time.time()
        print("yolo_losses", bs, len(y_true), end - start)
        pred_tx = torch.sigmoid(y_pred[..., 0])  # gt tx/ty are not deactivated
        pred_ty = torch.sigmoid(y_pred[..., 1])
        pred_tw = y_pred[..., 2]
        pred_th = y_pred[..., 3]
        pred_conf = y_pred[..., 4]
        pred_cls = y_pred[..., 5:]

        # 4. Compute loss
        obj_mask = obj_mask.cuda()
        non_obj_mask = non_obj_mask.cuda()
        cls_mask = cls_mask.cuda()
        gt_tx, gt_ty = gt_tx.cuda(), gt_ty.cuda()
        gt_tw, gt_th = gt_tw.cuda(), gt_th.cuda()

        # average over batch
        MSELoss = nn.MSELoss()
        BCEWithLogitsLoss = nn.BCEWithLogitsLoss()
        BCELoss = nn.BCELoss()
        CrossEntropyLoss = nn.CrossEntropyLoss()

        loss['x'] = MSELoss(pred_tx[obj_mask == 1], gt_tx[obj_mask == 1])
        loss['y'] = MSELoss(pred_ty[obj_mask == 1], gt_ty[obj_mask == 1])
        loss['w'] = MSELoss(pred_tw[obj_mask == 1], gt_tw[obj_mask == 1])
        loss['h'] = MSELoss(pred_th[obj_mask == 1], gt_th[obj_mask == 1])
        loss['cls'] = CrossEntropyLoss(
            pred_cls[obj_mask == 1], torch.argmax(cls_mask[obj_mask == 1], 1))
        loss['conf'] = BCEWithLogitsLoss(pred_conf[obj_mask == 1],
                                         obj_mask[obj_mask == 1])
        loss['non_conf'] = BCEWithLogitsLoss(pred_conf[non_obj_mask == 1],
                                             non_obj_mask[non_obj_mask == 1])
        loss['total_loss'] = loss['x'] + loss['y'] + loss['w'] + loss[
            'h'] + loss['cls'] + loss['conf'] + loss['non_conf']
        #["total_loss", "x", "y", "w", "h", "conf", "cls", "recall"]
        return loss['total_loss'], loss['x'], loss['y'], loss['w'], loss[
            'h'], loss['cls'], loss['conf'], loss['non_conf']