Exemplo n.º 1
0
def generate_mean_pixel_file():
    C = Config()
    all_imgs, _, _ = get_data(ROI_BBOX_FILE)

    avg = [0, 0, 0]
    for img_data in all_imgs:
        print(img_data['filepath'])
        img_data_aug, x_img = augment(img_data, C, augment=False)

        (width, height) = (img_data_aug['width'], img_data_aug['height'])
        (rows, cols, _) = x_img.shape

        # get image dimensions for resizing
        (resized_width,
         resized_height) = get_new_img_size(width, height, C.im_size)

        # resize the image so that smalles side is length = 600px
        x_img = cv2.resize(x_img, (resized_width, resized_height),
                           interpolation=cv2.INTER_CUBIC)
        pixels = (resized_width * resized_height)
        avg[0] += np.sum(x_img[:, :, 0]) / pixels
        avg[1] += np.sum(x_img[:, :, 1]) / pixels
        avg[2] += np.sum(x_img[:, :, 2]) / pixels
    avg = [a / len(all_imgs) for a in list(avg)]
    np.savetxt(MEAN_PIXEL_FILE, avg, delimiter=',')
Exemplo n.º 2
0
def format_img_size(img, C):
	""" formats the image size based on config """
	img_min_side = float(C.im_size)
	(height,width,_) = img.shape
	(resized_width, resized_height, ratio) = data_generators.get_new_img_size(width, height, C.im_size)
	img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
	return img, ratio	
def preprocess_image(image, config):
    # Deep-copy the image, changing data type to float
    x_img = np.array(image.data, copy=True, dtype=np.float32)

    rows, cols, _ = x_img.shape
    width, height = cols, rows

    # get image dimensions for resizing
    resized_width, resized_height = data_generators.get_new_img_size(
        width, height, config.im_size)

    # resize the image so that smalles side is length = 600px
    x_img = cv2.resize(x_img, (resized_width, resized_height),
                       interpolation=cv2.INTER_CUBIC)

    # Build the metadata including bounding boxes in the format keras_frcnn expects
    # We filter out bounding boxes for classes we don't care about
    bboxes = []
    for obj in image.metadata.labelled_objects:
        classes = set(obj.class_names) & set(config.class_mapping.keys())
        for cls in classes:
            bboxes.append({
                'class': cls,
                'x1': obj.bounding_box[0],
                'y1': obj.bounding_box[1],
                'x2': obj.bounding_box[0] + obj.bounding_box[2],
                'y2': obj.bounding_box[1] + obj.bounding_box[3]
            })
    metadata = {'width': width, 'height': height, 'bboxes': bboxes}
    try:
        y_rpn_cls, y_rpn_regr = data_generators.calc_rpn(
            config, metadata, width, height, resized_width, resized_height,
            nn.get_img_output_length)
    except Exception:
        return None, None, None

    # Zero-center by mean pixel, and preprocess image
    x_img[:, :, 0] -= config.img_channel_mean[0]
    x_img[:, :, 1] -= config.img_channel_mean[1]
    x_img[:, :, 2] -= config.img_channel_mean[2]
    x_img /= config.img_scaling_factor

    x_img = np.transpose(x_img, (2, 0, 1))
    x_img = np.expand_dims(x_img, axis=0)

    y_rpn_regr[:, y_rpn_regr.shape[1] // 2:, :, :] *= config.std_scaling

    if keras_backend.image_dim_ordering() == 'tf':
        x_img = np.transpose(x_img, (0, 2, 3, 1))
        y_rpn_cls = np.transpose(y_rpn_cls, (0, 2, 3, 1))
        y_rpn_regr = np.transpose(y_rpn_regr, (0, 2, 3, 1))

    return np.copy(x_img), [np.copy(y_rpn_cls), np.copy(y_rpn_regr)], metadata
Exemplo n.º 4
0
def plot_bbox(img_data, C, bbox):
    img_original = cv2.imread(img_data['filepath'])
    (width, height) = (img_data['width'], img_data['height'])
    (rows, cols, _) = img_original.shape
    path_old = img_data['filepath']
    assert cols == width
    assert rows == height
    (resized_width, resized_height) = data_generators.get_new_img_size(
        width, height, C.im_size)

    if bbox is not None:
        for i in range(bbox.shape[1]):
            x_c = bbox[0][i][0]
            y_c = bbox[0][i][1]
            w = bbox[0][i][2]
            h = bbox[0][i][3]
            x1_resize = x_c - w / 2.0
            y1_resize = y_c - w / 2.0
            x2_resize = x_c + h / 2.0
            y2_resize = y_c + h / 2.0

            x1_original = x1_resize * (float(width) / resized_width)
            x2_original = x2_resize * (float(width) / resized_width)
            y1_original = y1_resize * (float(height) / resized_height)
            y2_original = y2_resize * (float(height) / resized_height)

            x1 = int(round(x1_original * C.rpn_stride))
            x2 = int(round(x2_original * C.rpn_stride))
            y1 = int(round(y1_original * C.rpn_stride))
            y2 = int(round(y2_original * C.rpn_stride))

            cv2.rectangle(img_original, (x1, y1), (x2, y2), (255, 0, 0), 2)

    img_ID_start = path_old.find('/JPEGImages/')
    img_ID = path_old[img_ID_start + 12:]
    img_addr = '/home/xuele/rpn_bf/faster_rcnn/keras-frcnn/bbox_plot/' + img_ID
    cv2.imwrite(img_addr, img_original)
Exemplo n.º 5
0
def calc_iou(R, img_data, C, class_mapping):
    """
	本函数读入图片数据和经过非极大值抑制的rpn预测的坐标数据,对每个提出的predict box进行label的制作
	label包括GT 类别, GT坐标
	一个box四坐标只能对应一个类别,所以即使一张图里面有很多GT,只能选择iou最大的用于匹配
	:param R:
	:param img_data:
	:param C:
	:param class_mapping:
	:return:
	"""
    ## img_data_aug 是哈希表,存放各种和当前图象有关的信息
    bboxes = img_data['bboxes']

    (width, height) = (img_data['width'], img_data['height'])
    # get image dimensions for resizing
    (resized_width, resized_height) = data_generators.get_new_img_size(
        width, height, C.im_size)
    ## 有多少标注的ground truth boxes 就有多少gta
    gta = np.zeros((len(bboxes), 4))

    for bbox_num, bbox in enumerate(bboxes):
        # get the GT box coordinates, and resize to account for image resizing
        ## 计算调整后且映射到输出图大小上的ground truth boxes
        gta[bbox_num, 0] = int(
            round(bbox['x1'] * (resized_width / float(width)) / C.rpn_stride))
        gta[bbox_num, 1] = int(
            round(bbox['x2'] * (resized_width / float(width)) / C.rpn_stride))
        gta[bbox_num, 2] = int(
            round(bbox['y1'] * (resized_height / float(height)) /
                  C.rpn_stride))
        gta[bbox_num, 3] = int(
            round(bbox['y2'] * (resized_height / float(height)) /
                  C.rpn_stride))

    x_roi = []
    y_class_num = []
    y_class_regr_coords = []
    y_class_regr_label = []
    IoUs = []  # for debugging only
    ## R 只有 [bbox, prob] 中的 bbox 对应坐标值
    ## A: layerkinds*w*h, coor, 这里[0]会小于layerkinds*w*h 因为最多300
    for ix in range(R.shape[0]):
        (x1, y1, x2, y2) = R[ix, :]
        x1 = int(round(x1))
        y1 = int(round(y1))
        x2 = int(round(x2))
        y2 = int(round(y2))

        best_iou = 0.0
        best_bbox = -1
        for bbox_num in range(len(bboxes)):
            curr_iou = data_generators.iou([
                gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1],
                gta[bbox_num, 3]
            ], [x1, y1, x2, y2])
            if curr_iou > best_iou:
                best_iou = curr_iou
                best_bbox = bbox_num
        ## 选择可以用来最终分类的predict box
        if best_iou < C.classifier_min_overlap:  #if C.classifier_min_overlap <= best_iou < C.classifier_max_overlap:
            continue
        else:
            w = x2 - x1
            h = y2 - y1
            x_roi.append([x1, y1, w, h])
            IoUs.append(best_iou)
            ## 难例挖掘,与目标有部分重合但不够判断,分类成背景
            ## 一张图里面有再多的类
            if C.classifier_min_overlap <= best_iou < C.classifier_max_overlap:  #if best_iou < C.classifier_min_overlap:
                # hard negative example
                cls_name = 'bg'
            elif C.classifier_max_overlap <= best_iou:
                cls_name = bboxes[best_bbox]['class']
                ## ground truth box的中心点
                cxg = (gta[best_bbox, 0] + gta[best_bbox, 1]) / 2.0
                cyg = (gta[best_bbox, 2] + gta[best_bbox, 3]) / 2.0
                ## predict box的中心值
                cx = x1 + w / 2.0
                cy = y1 + h / 2.0

                tx = (cxg - cx) / float(w)
                ty = (cyg - cy) / float(h)
                tw = np.log((gta[best_bbox, 1] - gta[best_bbox, 0]) / float(w))
                th = np.log((gta[best_bbox, 3] - gta[best_bbox, 2]) / float(h))
            else:
                print('roi = {}'.format(best_iou))
                raise RuntimeError

        class_num = class_mapping[cls_name]
        class_label = len(class_mapping) * [0]
        class_label[class_num] = 1
        y_class_num.append(copy.deepcopy(class_label))
        ## -1 是因为背景
        coords = [0] * 4 * (len(class_mapping) - 1)
        labels = [0] * 4 * (len(class_mapping) - 1)
        if cls_name != 'bg':
            label_pos = 4 * class_num
            sx, sy, sw, sh = C.classifier_regr_std
            coords[label_pos:4 +
                   label_pos] = [sx * tx, sy * ty, sw * tw, sh * th]
            labels[label_pos:4 + label_pos] = [1, 1, 1, 1]
            y_class_regr_coords.append(copy.deepcopy(coords))
            y_class_regr_label.append(copy.deepcopy(labels))
        else:
            y_class_regr_coords.append(copy.deepcopy(coords))
            y_class_regr_label.append(copy.deepcopy(labels))

    if len(x_roi) == 0:
        return None, None, None, None
    ## 感觉像创建label
    X = np.array(x_roi)
    Y1 = np.array(y_class_num)
    ## axis= 1 这样同属于同一个像素的位置的同一个格式anchorbox的label coord 标签在同一行
    Y2 = np.concatenate(
        [np.array(y_class_regr_label),
         np.array(y_class_regr_coords)], axis=1)

    return np.expand_dims(X, axis=0), np.expand_dims(
        Y1, axis=0), np.expand_dims(Y2, axis=0), IoUs
Exemplo n.º 6
0
		landmarks.append(np.zeros(42))





	[new_bboxes,new_bboxes_rpn, new_probs,new_poses, new_genders,new_vizs, new_landmarks] = roi_helpers.non_max_suppression_fast_classifier(
																				np.array(bboxes),np.array(bboxes_rpn),np.array(probs),np.array(poses), np.array(genders),np.array(vizs), 
																				np.array(landmarks), overlap_thresh=0.2)


	base_threshold = 0.5
	true_bboxes = img_data['bboxes']
	gta = np.zeros((len(true_bboxes), 4))
	(width, height) = (img_data['width'], img_data['height'])
	(resized_width, resized_height) = data_generators.get_new_img_size(width, height, C.im_size)
	# bp()
	for bbox_num, bbox in enumerate(true_bboxes):
		# get the GT box coordinates, and resize to account for image resizing
		# bbox_gt2pred[bbox_num] = (-1,0)
		gta[bbox_num, 0] = int(round(bbox['x1'] * (resized_width / float(width))))
		gta[bbox_num, 1] = int(round(bbox['x2'] * (resized_width / float(width))))
		gta[bbox_num, 2] = int(round(bbox['y1'] * (resized_height / float(height))))
		gta[bbox_num, 3] = int(round(bbox['y2'] * (resized_height / float(height))))

	for jk in range(new_bboxes.shape[0]):
		# bbox_pred2tr[jk] = False
		################################################
		best_iou = 0.0
		best_bbox = 0
		best_score = 0
Exemplo n.º 7
0
def test_view_func(C, model_rpn, model_classifier):
    base_dir = os.getcwd()
    test_cls_all = ['aeroplane', 'bus', 'motorbike']
    class_mapping = C.class_mapping
    inv_class_mapping = {v: k for k, v in class_mapping.iteritems()}
    backend = K.image_dim_ordering()
    filename = '/home/gilad/bar/real7.p'
    video_filename = "/home/gilad/ssd/keras-frcnn-master/a.mp4"
    write_flag = False
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }

    # turn off any data augmentation at test time
    save_flag = False
    visualise = False
    count = 0
    good_img = 0
    not_good = 0
    mAP = 0

    def format_img_size(img, C):
        """ formats the image size based on config """
        img_min_side = float(C.im_size)
        (height, width, _) = img.shape

        if width <= height:
            ratio = img_min_side / width
            new_height = int(ratio * height)
            new_width = int(img_min_side)
        else:
            ratio = img_min_side / height
            new_width = int(ratio * width)
            new_height = int(img_min_side)
        img = cv2.resize(img, (new_width, new_height),
                         interpolation=cv2.INTER_CUBIC)
        return img, ratio

    def format_img_channels(img, C):
        """ formats the image channels based on config """
        img = img[:, :, (2, 1, 0)]
        img = img.astype(np.float32)
        img[:, :, 0] -= C.img_channel_mean[0]
        img[:, :, 1] -= C.img_channel_mean[1]
        img[:, :, 2] -= C.img_channel_mean[2]
        img /= C.img_scaling_factor
        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, axis=0)
        return img

    def draw_bbox(img, bbox, prob, azimuth, ratio):
        # new_boxes, new_probs, new_az = roi_helpers.non_max_suppression_fast(bbox, prob, azimuth, overlap_thresh=0.3,use_az=True)
        new_boxes = bbox
        new_az = azimuth
        new_probs = prob
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk, :]

            (real_x1, real_y1, real_x2,
             real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

            cv2.rectangle(
                img, (real_x1, real_y1), (real_x2, real_y2),
                (int(class_to_color[key][0]), int(
                    class_to_color[key][1]), int(class_to_color[key][2])), 2)
            # cv2.rectangle(img,(bbox_gt['x1'], bbox_gt['y1']), (bbox_gt['x2'], bbox_gt['y2']), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)

            # textLabel = '{}: {},azimuth : {}'.format(key,int(100*new_probs[jk]),new_az[jk])
            textLabel = 'azimuth : {}'.format(new_az[jk])

            all_dets.append((key, 100 * new_probs[jk]))

            (retval, baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_COMPLEX, 1,
                                                 1)
            textOrg = (real_x1, real_y1 + 15)

            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (0, 0, 0), 2)
            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (255, 255, 255), -1)
            cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1,
                        (0, 0, 0), 1)
        return img

    def format_img(img, C):
        """ formats an image for model prediction based on config """
        img, ratio = format_img_size(img, C)
        img = format_img_channels(img, C)
        return img, ratio

    def display_image(img):
        img1 = img[:, :, (2, 1, 0)]
        # img1=img
        im = Image.fromarray(img1.astype('uint8'), 'RGB')
        im.show()

    # Method to transform the coordinates of the bounding box to its original size
    def get_real_coordinates(ratio, x1, y1, x2, y2):
        ## read the training data from pickle file or from annotations
        real_x1 = int(round(x1 // ratio))
        real_y1 = int(round(y1 // ratio))
        real_x2 = int(round(x2 // ratio))
        real_y2 = int(round(y2 // ratio))
        return (real_x1, real_y1, real_x2, real_y2)

    vnum_test = 24
    azimuth_vec = np.concatenate(
        ([0],
         np.linspace((360. / (vnum_test * 2)), 360. -
                     (360. / (vnum_test * 2)), vnum_test)),
        axis=0)

    def find_interval(azimuth, azimuth_vec):
        for i in range(len(azimuth_vec)):
            if azimuth < azimuth_vec[i]:
                break
        ind = i
        if azimuth > azimuth_vec[-1]:
            ind = 1
        return ind

    # print(rep)
    obj_num = 0
    bbox_threshold_orig = 0.6
    th_bbox = 0.3
    #### open images from folder

    # for idx, img_name in enumerate(sorted(os.listdir(img_path))):
    # 	if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
    # 		continue
    # 	print(img_name)
    # 	filepath = os.path.join(img_path,img_name)
    # 	img = cv2.imread(filepath)caricycle

    #### open images from file
    ## read the training data from pickle file or from annotations
    # class_mapping = C.class_mapping
    succ = []
    for test_cls in test_cls_all:
        good_img = 0
        not_good = 0
        count = 0
        obj_num = 0
        gt_cls_num = class_mapping[test_cls]
        print('work on class {}'.format(test_cls))
        test_pickle = os.path.join(
            base_dir, 'pickle_data/test_data_{}.pickle'.format(test_cls))
        if os.path.exists(test_pickle):
            with open(test_pickle) as f:
                all_imgs, classes_count, _ = pickle.load(f)
        for im_file in all_imgs:
            filepath = im_file['filepath']
            img = cv2.imread(filepath)
            img_gt = np.copy(img)
            if img is None:
                not_good += 1
                continue
            else:
                good_img += 1
                # print ('im num {}'.format(good_img))
            if good_img % 50 == 0:
                print("worked on {} images".format(good_img))
            X, ratio = format_img(img, C)

            if backend == 'tf':
                X = np.transpose(X, (0, 2, 3, 1))

            # get the feature maps and output from the RPN
            [Y1, Y2] = model_rpn.predict(X)
            R = roi_helpers.rpn_to_roi(Y1,
                                       Y2,
                                       C,
                                       K.image_dim_ordering(),
                                       overlap_thresh=0.7)
            # # convert from (x1,y1,x2,y2) to (x,y,w,h)
            R[:, 2] -= R[:, 0]
            R[:, 3] -= R[:, 1]

            width, height = int(im_file["width"]), int(im_file["height"])
            resized_width, resized_height = data_generators.get_new_img_size(
                width, height, C.im_size)
            # [_,_, F] = model_rpn.predict(X)

            ROIs = []
            ## pass on all the labels in the image, some of them are not equal to test_cls
            for bbox_gt in im_file['bboxes']:
                if not bbox_gt['class'] == test_cls:
                    continue
                no_bbox_flag = 1
                bbox_threshold = bbox_threshold_orig

                while no_bbox_flag and bbox_threshold > th_bbox:
                    cls_gt = bbox_gt['class']
                    az_gt = bbox_gt['azimuth']
                    el_gt = bbox_gt['elevation']
                    t_gt = bbox_gt['tilt']
                    if bbox_gt[
                            'class'] == test_cls and bbox_threshold == bbox_threshold_orig:
                        obj_num += 1
                    if len(ROIs) == 0:
                        # apply the spatial pyramid pooling to the proposed regions
                        bboxes = {}
                        probs = {}
                        azimuths = {}

                        # print ('obj num {}'.format(obj_num))

                        for jk in range(R.shape[0] // C.num_rois + 1):
                            ROIs = np.expand_dims(
                                R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                axis=0)
                            if ROIs.shape[1] == 0:
                                break

                            if jk == R.shape[0] // C.num_rois:
                                #pad R
                                curr_shape = ROIs.shape
                                target_shape = (curr_shape[0], C.num_rois,
                                                curr_shape[2])
                                ROIs_padded = np.zeros(target_shape).astype(
                                    ROIs.dtype)
                                ROIs_padded[:, :curr_shape[1], :] = ROIs
                                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0,
                                                                         0, :]
                                ROIs = ROIs_padded

                            [P_cls, P_regr,
                             P_view] = model_classifier.predict([X, ROIs])

                            for ii in range(P_cls.shape[1]):

                                if np.max(P_cls[0, ii, :]
                                          ) < bbox_threshold or np.argmax(
                                              P_cls[0, ii, :]) == (
                                                  P_cls.shape[2] - 1):
                                    continue

                                ## get class from the net
                                # cls_num = np.argmax(P_cls[0, ii, :])

                                ## use gt class
                                cls_num = gt_cls_num

                                cls_name = inv_class_mapping[cls_num]
                                cls_view = P_view[0, ii, 360 * cls_num:360 *
                                                  (cls_num + 1)]
                                # cls_name_gt = cls_nimg = draw_bbox(img,bbox, prob, azimuth, ratio)ame
                                # if cls_name == cls_name_gt:
                                # 	print(np.argmax(cls_view,axis=0))
                                if cls_name not in bboxes:
                                    bboxes[cls_name] = []
                                    probs[cls_name] = []
                                    azimuths[cls_name] = []

                                (x, y, w, h) = ROIs[0, ii, :]

                                try:
                                    (tx, ty, tw, th) = P_regr[0, ii,
                                                              4 * cls_num:4 *
                                                              (cls_num + 1)]
                                    tx /= C.classifier_regr_std[0]
                                    ty /= C.classifier_regr_std[1]
                                    tw /= C.classifier_regr_std[2]
                                    th /= C.classifier_regr_std[3]
                                    x, y, w, h = roi_helpers.apply_regr(
                                        x, y, w, h, tx, ty, tw, th)
                                except:
                                    pass
                                bboxes[cls_name].append([
                                    C.rpn_stride * x, C.rpn_stride * y,
                                    C.rpn_stride * (x + w),
                                    C.rpn_stride * (y + h)
                                ])
                                probs[cls_name].append(np.max(P_cls[0, ii, :]))
                                azimuths[cls_name].append(
                                    np.argmax(cls_view, axis=0))

                    all_dets = []
                    if len(bboxes) == 0:
                        bbox_threshold -= 0.1
                    for key in bboxes:
                        # if 1:
                        if key == test_cls and bbox_gt['class'] == test_cls:
                            bbox = np.array(bboxes[key])
                            prob = np.array(probs[key])
                            azimuth = np.array(azimuths[key])

                            ## get the azimuth from bbox that have more than 'overlap_thresh' overlap with gt_bbox
                            az = []
                            overlap_thresh = 0.5
                            try:
                                while np.size(
                                        az) == 0 and overlap_thresh > 0.3:
                                    _, prob_bbox, az = roi_helpers.overlap_with_gt(
                                        bbox,
                                        prob,
                                        azimuth,
                                        bbox_gt,
                                        ratio=ratio,
                                        overlap_thresh=overlap_thresh,
                                        max_boxes=300,
                                        use_az=True)
                                    if np.size(
                                            az) != 0 and overlap_thresh == 0.5:
                                        mAP += 1
                                    overlap_thresh -= 0.1
                                if overlap_thresh == 0:
                                    print("No good Bbox was found")
                                counts = np.bincount(az)
                            except:
                                az = []
                                counts = []
                            try:
                                az_fin = np.argmax(counts)
                                true_bin = find_interval(az_gt, azimuth_vec)
                                prob_bin = find_interval(az_fin, azimuth_vec)
                                no_bbox_flag = 0
                                if true_bin == prob_bin:
                                    count += 1
                                    break
                            except:
                                # print('here')
                                no_bbox_flag = 1
                                bbox_threshold -= 0.1

                        ## azimuth calculations

                        ## display
                        # if visualise:
                        # 	display_image(img)
                        # # cv2.imshow('img', img)
                        # # cv2.waitKey(0)
                        # if save_flag:
                        #    cv2.imwrite('./results_imgs/{}'.format(img_name),img)
                        #    # img = img[:, :, (2, 1, 0)]
                        #    # cv2.imwrite('./results_imgs/video/{}.png'.format(num),img)
                        # # print('save')
                    bbox_threshold -= 0.1
                    # if visualise:
                    # 	display_image(img)
        succ.append(float(count) / float(obj_num) * 100.)
        string = 'for class {} -true count is {} out of {} from {} images . {} success'.format(
            test_cls, count, obj_num, good_img,
            float(count) / float(obj_num) * 100.)
        print(string)
        mAP = float(mAP) / float(obj_num) * 100.
        print("MAP is {}".format(mAP))
        # if write_flag:
        # 	f = open('{}_results.txt'.format(weight_name),'a')
        # 	f.write(string+'\n')
        # 	f.close()
    return succ, mAP
Exemplo n.º 8
0
def test_view_func_NN(model_classifier, model_rpn, model_inner, C):
    test_cls = 'aeroplane'
    input_train_file = 'pickle_data/train_data_Wflip_all.pickle'

    ## read the training data from pickle file or from annotations
    test_pickle = 'pickle_data/test_data_{}.pickle'.format(test_cls)
    if os.path.exists(test_pickle):
        with open(test_pickle) as f:
            all_imgs, classes_count, _ = pickle.load(f)

    class_mapping = C.class_mapping
    inv_class_mapping = {v: k for k, v in class_mapping.iteritems()}
    backend = K.image_dim_ordering()
    gt_cls_num = class_mapping[test_cls]
    print('work on class {}'.format(test_cls))
    base_path = os.getcwd()

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False
    count = 0
    good_img = 0
    not_good = 0

    def format_img_size(img, C):
        """ formats the image size based on config """
        img_min_side = float(C.im_size)
        (height, width, _) = img.shape

        if width <= height:
            ratio = img_min_side / width
            new_height = int(ratio * height)
            new_width = int(img_min_side)
        else:
            ratio = img_min_side / height
            new_width = int(ratio * width)
            new_height = int(img_min_side)
        img = cv2.resize(img, (new_width, new_height),
                         interpolation=cv2.INTER_CUBIC)
        return img, ratio

    def format_img_channels(img, C):
        """ formats the image channels based on config """
        img = img[:, :, (2, 1, 0)]
        img = img.astype(np.float32)
        img[:, :, 0] -= C.img_channel_mean[0]
        img[:, :, 1] -= C.img_channel_mean[1]
        img[:, :, 2] -= C.img_channel_mean[2]
        img /= C.img_scaling_factor
        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, axis=0)
        return img

    def format_img(img, C):
        """ formats an image for model prediction based on config """
        img, ratio = format_img_size(img, C)
        img = format_img_channels(img, C)
        return img, ratio

    def display_image(img):
        img1 = img[:, :, (2, 1, 0)]
        # img1=img
        im = Image.fromarray(img1.astype('uint8'), 'RGB')
        im.show()

    # Method to transform the coordinates of the bounding box to its original size
    def get_real_coordinates(ratio, x1, y1, x2, y2):
        ## read the training data from pickle file or from annotations
        real_x1 = int(round(x1 // ratio))
        real_y1 = int(round(y1 // ratio))
        real_x2 = int(round(x2 // ratio))
        real_y2 = int(round(y2 // ratio))
        return (real_x1, real_y1, real_x2, real_y2)

    vnum_test = 24
    azimuth_vec = np.concatenate(
        ([0],
         np.linspace((360. / (vnum_test * 2)), 360. -
                     (360. / (vnum_test * 2)), vnum_test)),
        axis=0)

    def find_interval(azimuth, azimuth_vec):
        for i in range(len(azimuth_vec)):
            if azimuth < azimuth_vec[i]:
                break
        ind = i
        if azimuth > azimuth_vec[-1]:
            ind = 1
        return ind

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    # print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = 32

    obj_num = 0
    bbox_threshold_orig = 0.6
    th_bbox = 0.4

    ## get GT for all az for single cls
    feature_az = []
    sorted_path = input_train_file
    tmp_ind = sorted_path.index('.pickle')
    sorted_path = sorted_path[:tmp_ind] + "_sorted_Angles" + sorted_path[
        tmp_ind:]
    if os.path.exists(sorted_path):
        print("loading sorted data")
        with open(sorted_path) as f:
            trip_data = pickle.load(f)
    im_file = []
    ind = []
    for ii in range(360):
        for jj in range(3):
            try:
                im_file.append(trip_data[test_cls][ii][jj])
                ind.append(ii)
            except:
                if jj == 0:
                    print('no azimuth {}'.format(ii))
    data_gen_train = data_generators.get_anchor_gt(im_file, [],
                                                   C,
                                                   K.image_dim_ordering(),
                                                   mode='test')
    azimuth_dict = []
    inner_NN = []
    azimuths = []
    for tt in range(len(ind)):
        try:
            if tt % 100 == 0:
                print('worked on {}/{}'.format(tt, len(ind)))
            # print ('im num {}'.format(good_img))
            X, Y, img_data = next(data_gen_train)

            P_rpn = model_rpn.predict_on_batch(X)

            R = roi_helpers.rpn_to_roi(P_rpn[0],
                                       P_rpn[1],
                                       C,
                                       K.image_dim_ordering(),
                                       use_regr=True,
                                       overlap_thresh=0.7,
                                       max_boxes=300)

            X2, Y1, Y2, Y_view = roi_helpers.calc_iou_new(
                R, img_data, C, C.class_mapping)

            pos_samples = np.where(Y1[0, :, -1] == 0)
            sel_samples = pos_samples[0].tolist()
            R = X2[0, sel_samples, :]
            for jk in range(R.shape[0] // C.num_rois + 1):
                ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois *
                                        (jk + 1), :],
                                      axis=0)
                if ROIs.shape[1] == 0:
                    break

                if jk == R.shape[0] // C.num_rois:
                    # pad R
                    curr_shape = ROIs.shape
                    target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                    ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                    ROIs_padded[:, :curr_shape[1], :] = ROIs
                    ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                    ROIs = ROIs_padded

                [P_cls, P_regr, P_view] = model_classifier.predict([X, ROIs])
                iner_f = model_inner.predict([X, ROIs])
                # oo = model_classifier_only.predict([F, ROIs])

                for ii in range(len(sel_samples)):

                    if np.max(P_cls[0,
                                    ii, :]) < bbox_threshold_orig or np.argmax(
                                        P_cls[0,
                                              ii, :]) == (P_cls.shape[2] - 1):
                        continue

                    ## get class from the net
                    # cls_num = np.argmax(P_cls[0, ii, :])

                    ## use gt class
                    cls_num = gt_cls_num

                    cls_name = inv_class_mapping[cls_num]
                    cls_view = P_view[0, ii, 360 * cls_num:360 * (cls_num + 1)]

                    # azimuths[cls_name].append(np.argmax(cls_view, axis=0))
                    inner_NN.append(iner_f[0, ii, :])
                    azimuth_dict.append(img_data['bboxes'][0]['azimuth'])
        except:
            print('failed on az {}'.format(img_data['bboxes'][0]['azimuth']))
    ## calculating some mean feature map for every az
    with open('pickle_data/{}_NN.pickle'.format(C.weight_name), 'w') as f:
        pickle.dump([inner_NN, azimuth_dict], f)
        print('saved PICKLE')

    with open('pickle_data/{}_NN.pickle'.format(C.weight_name)) as f:
        inner_NN, azimuth_dict = pickle.load(f)
    neigh = KNeighborsClassifier(n_neighbors=1)
    neigh.fit(inner_NN, azimuth_dict)

    jj = 0
    for im_file in all_imgs:
        jj += 1
        if jj % 50 == 0:
            print(jj)
        filepath = im_file['filepath']
        img = cv2.imread(filepath)
        img_gt = np.copy(img)
        if img is None:
            not_good += 1
            continue
        else:
            good_img += 1
            # print ('im num {}'.format(good_img))
        X, ratio = format_img(img, C)

        if backend == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        Y1, Y2 = model_rpn.predict(X)
        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)
        # # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        width, height = int(im_file["width"]), int(im_file["height"])
        resized_width, resized_height = data_generators.get_new_img_size(
            width, height, C.im_size)
        # [_,_, F] = model_rpn.predict(X)
        ROIs = []
        ## pass on all the labels in the image, some of them are not equal to test_cls
        for bbox_gt in im_file['bboxes']:
            no_bbox_flag = 1
            bbox_threshold = bbox_threshold_orig
            if not bbox_gt['class'] == test_cls:
                continue
            if bbox_gt[
                    'class'] == test_cls and bbox_threshold == bbox_threshold_orig:
                obj_num += 1
            while no_bbox_flag and bbox_threshold > th_bbox:
                cls_gt = bbox_gt['class']
                az_gt = bbox_gt['azimuth']
                el_gt = bbox_gt['elevation']
                t_gt = bbox_gt['tilt']
                if len(ROIs) == 0:
                    # apply the spatial pyramid pooling to the proposed regions
                    bboxes = {}
                    probs = {}
                    azimuths = {}
                    inner_res = {}
                    # print ('obj num {}'.format(obj_num))

                    for jk in range(R.shape[0] // C.num_rois + 1):
                        ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois *
                                                (jk + 1), :],
                                              axis=0)
                        if ROIs.shape[1] == 0:
                            break

                        if jk == R.shape[0] // C.num_rois:
                            #pad R
                            curr_shape = ROIs.shape
                            target_shape = (curr_shape[0], C.num_rois,
                                            curr_shape[2])
                            ROIs_padded = np.zeros(target_shape).astype(
                                ROIs.dtype)
                            ROIs_padded[:, :curr_shape[1], :] = ROIs
                            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                            ROIs = ROIs_padded

                        [P_cls, P_regr,
                         P_view] = model_classifier.predict([X, ROIs])
                        inner_out = model_inner.predict([X, ROIs])
                        # oo = model_classifier_only.predict([F, ROIs])

                        for ii in range(P_cls.shape[1]):

                            if np.max(P_cls[
                                    0, ii, :]) < bbox_threshold or np.argmax(
                                        P_cls[0,
                                              ii, :]) == (P_cls.shape[2] - 1):
                                continue

                            ## get class from the net
                            # cls_num = np.argmax(P_cls[0, ii, :])

                            ## use gt class
                            cls_num = gt_cls_num

                            cls_name = inv_class_mapping[cls_num]
                            cls_view = P_view[0, ii, 360 * cls_num:360 *
                                              (cls_num + 1)]

                            if cls_name not in bboxes:
                                bboxes[cls_name] = []
                                probs[cls_name] = []
                                azimuths[cls_name] = []
                                inner_res[cls_name] = []

                            (x, y, w, h) = ROIs[0, ii, :]

                            try:
                                (tx, ty, tw,
                                 th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                                tx /= C.classifier_regr_std[0]
                                ty /= C.classifier_regr_std[1]
                                tw /= C.classifier_regr_std[2]
                                th /= C.classifier_regr_std[3]
                                x, y, w, h = roi_helpers.apply_regr(
                                    x, y, w, h, tx, ty, tw, th)
                            except:
                                pass
                            bboxes[cls_name].append([
                                C.rpn_stride * x, C.rpn_stride * y,
                                C.rpn_stride * (x + w), C.rpn_stride * (y + h)
                            ])
                            probs[cls_name].append(np.max(P_cls[0, ii, :]))
                            azimuths[cls_name].append(
                                np.argmax(cls_view, axis=0))
                            inner_res[cls_name].append(inner_out[0, ii, :])

                # cv2.rectangle(img_gt, (bbox_gt['x1'], bbox_gt['y1']), (bbox_gt['x2'], bbox_gt['y2']), (int(class_to_color[test_cls][0]), int(class_to_color[test_cls][1]), int(class_to_color[test_cls][2])), 2)
                for key in bboxes:
                    # if 1:
                    if key == test_cls and bbox_gt['class'] == test_cls:
                        bbox = np.array(bboxes[key])
                        prob = np.array(probs[key])
                        azimuth = np.array(azimuths[key])
                        inner_result = np.array(inner_res[key])
                        # img = draw_bbox(img,bbox, prob, azimuth, ratio)
                        azimuth = neigh.predict(inner_result)
                        ## get the azimuth from bbox that have more than 'overlap_thresh' overlap with gt_bbox
                        az = []
                        overlap_thresh = 0.5
                        try:
                            while np.size(az) == 0 and overlap_thresh > 0:
                                _, prob_bbox, az = roi_helpers.overlap_with_gt(
                                    bbox,
                                    prob,
                                    azimuth,
                                    bbox_gt,
                                    ratio=ratio,
                                    overlap_thresh=overlap_thresh,
                                    max_boxes=300,
                                    use_az=True)
                                overlap_thresh -= 0.1
                            if overlap_thresh == 0:
                                print("No good Bbox was found")
                            counts = np.bincount(az)
                        except:
                            az = []
                            counts = []
                        try:
                            az_fin = np.argmax(counts)
                            true_bin = find_interval(az_gt, azimuth_vec)
                            prob_bin = find_interval(az_fin, azimuth_vec)
                            no_bbox_flag = 0
                            if true_bin == prob_bin:
                                count += 1
                                break
                        except:
                            # print('here')
                            no_bbox_flag = 1
                            bbox_threshold -= 0.1

                    ## azimuth calculations

                    ## display

                bbox_threshold -= 0.1

    succ = float(count) / float(obj_num) * 100.
    print(
        'for class {} -true count is {} out of {} from {} images . {} success'.
        format(test_cls, count, obj_num, good_img, succ))
    return succ
def calc_iou(R, img_data, C, class_mapping):

	bboxes = img_data['bboxes']
	(width, height) = (img_data['width'], img_data['height'])
	# get image dimensions for resizing
	(resized_width, resized_height) = data_generators.get_new_img_size(width, height, C.im_size)

	gta = np.zeros((len(bboxes), 4))

	for bbox_num, bbox in enumerate(bboxes):
		# get the GT box coordinates, and resize to account for image resizing
		gta[bbox_num, 0] = int(round(bbox['x1'] * (resized_width / float(width))/C.rpn_stride))
		gta[bbox_num, 1] = int(round(bbox['x2'] * (resized_width / float(width))/C.rpn_stride))
		gta[bbox_num, 2] = int(round(bbox['y1'] * (resized_height / float(height))/C.rpn_stride))
		gta[bbox_num, 3] = int(round(bbox['y2'] * (resized_height / float(height))/C.rpn_stride))

	x_roi = []
	y_class_num = []
	y_class_regr_coords = []
	y_class_regr_label = []

	for ix in range(R.shape[0]):
		(x1, y1, x2, y2) = R[ix, :]
		x1 = int(round(x1))
		y1 = int(round(y1))
		x2 = int(round(x2))
		y2 = int(round(y2))

		best_iou = 0.0
		best_bbox = -1
		for bbox_num in range(len(bboxes)):
			curr_iou = data_generators.iou([gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3]], [x1, y1, x2, y2])
			if curr_iou > best_iou:
				best_iou = curr_iou
				best_bbox = bbox_num

		if best_iou < C.classifier_min_overlap:
				continue
		else:
			w = x2 - x1
			h = y2 - y1
			x_roi.append([x1, y1, w, h])

			if C.classifier_min_overlap <= best_iou < C.classifier_max_overlap:
				# hard negative example
				cls_name = 'bg'
			elif C.classifier_max_overlap <= best_iou:
				cls_name = bboxes[best_bbox]['class']
				cxg = (gta[best_bbox, 0] + gta[best_bbox, 1]) / 2.0
				cyg = (gta[best_bbox, 2] + gta[best_bbox, 3]) / 2.0

				cx = x1 + w / 2.0
				cy = y1 + h / 2.0

				tx = (cxg - cx) / float(w)
				ty = (cyg - cy) / float(h)
				tw = np.log((gta[best_bbox, 1] - gta[best_bbox, 0]) / float(w))
				th = np.log((gta[best_bbox, 3] - gta[best_bbox, 2]) / float(h))
			else:
				print('roi = {}'.format(best_iou))
				raise RuntimeError

		class_num = class_mapping[cls_name]
		class_label = len(class_mapping) * [0]
		class_label[class_num] = 1
		y_class_num.append(copy.deepcopy(class_label))
		coords = [0] * 4 * (len(class_mapping) - 1)
		labels = [0] * 4 * (len(class_mapping) - 1)
		if cls_name != 'bg':
			label_pos = 4 * class_num
			sx, sy, sw, sh = C.classifier_regr_std
			coords[label_pos:4+label_pos] = [sx*tx, sy*ty, sw*tw, sh*th]
			labels[label_pos:4+label_pos] = [1, 1, 1, 1]
			y_class_regr_coords.append(copy.deepcopy(coords))
			y_class_regr_label.append(copy.deepcopy(labels))
		else:
			y_class_regr_coords.append(copy.deepcopy(coords))
			y_class_regr_label.append(copy.deepcopy(labels))

	if len(x_roi) == 0:
		return None, None, None

	X = np.array(x_roi)
	Y1 = np.array(y_class_num)
	Y2 = np.concatenate([np.array(y_class_regr_label),np.array(y_class_regr_coords)],axis=1)

	return np.expand_dims(X, axis=0), np.expand_dims(Y1, axis=0), np.expand_dims(Y2, axis=0)