Exemplos de non_max_suppression_fast em Python, exemplos de frcnn.roi_helpers.non_max_suppression_fast em Python

Exemplo n.º 1

0

Exibir arquivo

def detect_img(img_name):

    use_horizontal_flips = False
    use_vertical_flips = False
    rot_90 = False
    im_size = 600
    anchor_box_scales = [64, 128, 256, 512]
    anchor_box_ratios = [[1, 1], [1, 2], [2, 1]]
    im_size = 600
    img_channel_mean = [103.939, 116.779, 123.68]
    img_scaling_factor = 1.0
    num_rois = 4
    rpn_stride = 16
    balanced_classes = False
    std_scaling = 4.0
    classifier_regr_std = [8.0, 8.0, 4.0, 4.0]
    rpn_min_overlap = 0.3
    rpn_max_overlap = 0.7
    classifier_min_overlap = 0.1
    classifier_max_overlap = 0.5
    class_mapping = {'MALIGNANT': 0, 'BENIGN': 1, 'bg': 2}

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }

    num_features = 1024

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(anchor_box_scales) * len(anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    model_path = "frcnn\\model_final_1.hdf5"
    # print('Loading weights from {}'.format(model_path))
    # model_rpn.load_weights(model_path, by_name=True)
    # model_classifier.load_weights(model_path, by_name=True)

    # model_rpn.compile(optimizer='sgd', loss='mse')
    # model_classifier.compile(optimizer='sgd', loss='mse')
    model_rpn, model_classifier = get_model(model_path, model_rpn,
                                            model_classifier)

    all_imgs = []

    classes = {}

    bbox_threshold = 0.8

    print(img_name)
    st = time.time()
    # filepath = os.path.join(img_path,img_name)

    img = cv2.imread(img_name)

    X, ratio = format_img(img, im_size, img_channel_mean, img_scaling_factor)

    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))

    # get the feature maps and output from the RPN
    [Y1, Y2, F] = model_rpn.predict(X)

    R = roi_helpers.rpn_to_roi(Y1,
                               Y2,
                               anchor_box_scales,
                               anchor_box_ratios,
                               std_scaling,
                               rpn_stride,
                               K.image_dim_ordering(),
                               overlap_thresh=0.5)

    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]

    # apply the spatial pyramid pooling to the proposed regions
    bboxes = {}
    probs = {}

    for jk in range(R.shape[0] // num_rois + 1):
        ROIs = np.expand_dims(R[num_rois * jk:num_rois * (jk + 1), :], axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0] // num_rois:
            #pad R
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0], num_rois, curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded

        [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

        for ii in range(P_cls.shape[1]):

            if np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue

            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]

            cls_num = np.argmax(P_cls[0, ii, :])
            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= classifier_regr_std[0]
                ty /= classifier_regr_std[1]
                tw /= classifier_regr_std[2]
                th /= classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            bboxes[cls_name].append([
                rpn_stride * x, rpn_stride * y, rpn_stride * (x + w),
                rpn_stride * (y + h)
            ])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))

    all_dets = []

    for key in bboxes:
        bbox = np.array(bboxes[key])

        new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
            bbox, np.array(probs[key]), overlap_thresh=0.5)
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk, :]

            (real_x1, real_y1, real_x2,
             real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

            cv2.rectangle(
                img, (real_x1, real_y1), (real_x2, real_y2),
                (int(class_to_color[key][0]), int(
                    class_to_color[key][1]), int(class_to_color[key][2])), 2)

            textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
            all_dets.append((key, 100 * new_probs[jk]))

            (retval, baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_COMPLEX, 1,
                                                 1)
            textOrg = (real_x1, real_y1 - 0)

            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (0, 0, 0), 2)
            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (255, 255, 255), -1)
            cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1,
                        (0, 0, 0), 1)

    # print('Elapsed time = {}'.format(time.time() - st))
    # print(all_dets)
    # cv2.imshow('img', img)
    # cv2.waitKey(0)
    img_name = img_name.split('\\')[-1]
    # cv2.imwrite(f'./static/images/{img_name}.png', img)
    cv2.imwrite('./predict/kq.jpg', img)
    try:
        a = all_dets[0]
    except:
        a = ("khong phat hien", "khong phat hien")

    print(a)
    return img_name, a


# print("tp: {} \nfp: {}".format(tp, fp))

# img_name = r"D:\Desktop\thesis\Images\mass_crop_train\P_01981_RIGHT_MLO_FULL.jpg"

# a, b = detect_img(img_name)
# print(b[0], b[1])
# print(type(b[0]), type(b[1]))

Exemplo n.º 2

0

Exibir arquivo

def findBBox(Q_frcnn,side,C,options,model_rpn,model_classifier_only,overlap=70):

	# correction applied to bbox co-ordinates oif scanning right side of img
	if side == 'R':
		org_shift = 814-overlap/2
	elif side == 'L':
		org_shift = 0

	# obtaining classes used to train classifier
	class_mapping = C.class_mapping

	if 'bg' not in class_mapping:
		class_mapping['bg'] = len(class_mapping)

	class_mapping = {v: k for k, v in class_mapping.iteritems()}
	class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}


	while not Q_frcnn.empty():
		try:
			# get image from queue
			img = Q_frcnn.get()

			# removing channel means
			X = img[:, :, (2, 1, 0)]
			X = X.astype(np.float32)

			X[:, :, 0] -= C.img_channel_mean[0]
			X[:, :, 1] -= C.img_channel_mean[1]
			X[:, :, 2] -= C.img_channel_mean[2]

			X = np.transpose(X, (2, 0, 1))
			X = np.expand_dims(X, axis=0)


			if K.image_dim_ordering() == 'tf':
				X = np.transpose(X, (0, 2, 3, 1))

			# get the feature maps and output from the RPN
			[Y1, Y2, F] = model_rpn.predict(X)

			# convert rpn output into co-ordinates of corners of bbox
			R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=options.non_maxima_suprresion_threshold-0.2)

			# convert from (x1,y1,x2,y2) to (x,y,w,h)
			R[:, 2] -= R[:, 0]
			R[:, 3] -= R[:, 1]

			# apply the spatial pyramid pooling to the proposed regions
			bboxes = {}
			probs = {}

			for jk in range(R.shape[0]//C.num_rois + 1):
				ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
				if ROIs.shape[1] == 0:
					break

				if jk == R.shape[0]//C.num_rois:
					# padding R
					curr_shape = ROIs.shape
					target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
					ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
					ROIs_padded[:, :curr_shape[1], :] = ROIs
					ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
					ROIs = ROIs_padded

				# passing proposed ROIs to classifier
				[P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

				for ii in range(P_cls.shape[1]):

					if np.max(P_cls[0, ii, :]) < options.bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
						continue

					cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

					if cls_name not in bboxes:
						bboxes[cls_name] = []
						probs[cls_name] = []

					(x, y, w, h) = ROIs[0, ii, :]

					cls_num = np.argmax(P_cls[0, ii, :])

					try:
						(tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
						tx /= C.classifier_regr_std[0]
						ty /= C.classifier_regr_std[1]
						tw /= C.classifier_regr_std[2]
						th /= C.classifier_regr_std[3]
						x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
					except:
						pass

					rpns = C.rpn_stride

					bboxes[cls_name].append([rpns*x, rpns*y, rpns*(x+w), rpns*(y+h)])
					probs[cls_name].append(np.max(P_cls[0, ii, :]))

			bboxForFrames = []

			for key in bboxes:
				bbox = np.array(bboxes[key])
				new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=options.non_maxima_suprresion_threshold-0.2)

				for jk in range(new_boxes.shape[0]):
					(x1, y1, x2, y2) = new_boxes[jk,:]

					x1 += org_shift				# adjusting for change in origin when dividing image into two halves
					x2 += org_shift				# co-ordinates system of righ half shifted
					y1 = 0 if y1<0 else y1		# negative co-ordinates clamped to zero
					y2 = 0 if y2<0 else y2
					bboxForFrames.append((x1,y1,x2,y2))

			Q_frcnn.task_done()
			yield bboxForFrames

		except StopIteration:
			return

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_frcnn.py Projeto: xuannianc/keras-frcnn

                    continue
                x = max(0, x)
                y = max(0, y)
            except:
                pass
            bboxes[class_name].append([
                C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                C.rpn_stride * (y + h)
            ])
            probs[class_name].append(np.max(rcnn_class[0, roi_idx, :]))

    all_detections = []

    for class_name in bboxes:
        class_bboxes = np.array(bboxes[class_name])
        filtered_class_boxes, filtered_class_probs = roi_helpers.non_max_suppression_fast(
            class_bboxes, np.array(probs[class_name]), overlap_thresh=0.5)
        for idx in range(filtered_class_boxes.shape[0]):
            (x1, y1, x2, y2) = filtered_class_boxes[idx, :]
            # 恢复到 resize 前的 size
            (x1, y1, x2, y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

            cv2.rectangle(image, (x1, y1), (x2, y2),
                          (int(class_name_color_mapping[class_name][0]),
                           int(class_name_color_mapping[class_name][1]),
                           int(class_name_color_mapping[class_name][2])), 2)

            text = '{}: {}'.format(class_name,
                                   int(100 * filtered_class_probs[idx]))
            logger.info('Detect {} in {} with prob {}'.format(
                class_name, (x1, y1, x2, y2), filtered_class_probs[idx]))
            all_detections.append((class_name, 100 * filtered_class_probs[idx],

Exemplo n.º 4

0

Exibir arquivo

				tx /= C.classifier_regr_std[0]
				ty /= C.classifier_regr_std[1]
				tw /= C.classifier_regr_std[2]
				th /= C.classifier_regr_std[3]
				x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
			except:
				pass
			bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)])
			probs[cls_name].append(np.max(P_cls[0, ii, :]))

	all_dets = []

	for key in bboxes:
		bbox = np.array(bboxes[key])

		new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
		for jk in range(new_boxes.shape[0]):
			(x1, y1, x2, y2) = new_boxes[jk,:]

			(real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

			cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)

			textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
			all_dets.append((key,100*new_probs[jk]))

			(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
			textOrg = (real_x1, real_y1-0)

			cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
			cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test.py Projeto: kdally/gate-detection-faster-rcnn

def test(im_size=600,
         mode='normal',
         detect_threshold=0.9,
         overlap_threshold=0.5):
    """ Test the model
    Args:
        im_size: trained model available for 300, 400 or 600. Input images are resized to this size.
        mode: 'normal' means predictions will be saved. Any other mode means only a CSV corner file will be saved.
        detect_threshold: minimum class belonging probability for a proposal to be accepted
        overlap_threshold: maximum IoU between two proposals

    Returns:
        avg_time: time taken over the last 10 images. Time is measured from loading the image to saving the predictions.
    """

    conf_file = f'models/conf/config_frcnn_{im_size}.pickle'
    C = pickle.load(open(conf_file, 'rb'))

    img_path = 'data/testing/images'

    class_mapping = C.class_mapping

    class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    class_to_color = {
        class_mapping[v]: np.array([0, 128, 255])
        for v in class_mapping
    }

    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, 1024)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base resnet network
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping))

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    model_rpn, model_classifier = load_weights(im_size, model_rpn,
                                               model_classifier)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    output_folder = f'output/predictions/frcnn_size{int(im_size)}_p{int(detect_threshold * 100)}'
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    output_file = open(
        f'output/predictions/frcnn_size{int(im_size)}_p{int(detect_threshold * 100)}/'
        f'predicted_corners_size{int(im_size)}_p{int(detect_threshold * 100)}.csv',
        'w')
    writer = csv.writer(output_file)

    print(
        f'Predicting gates for im_size={im_size} and detection probability threshold of {int(detect_threshold * 100)}%.'
    )
    print(
        f'Output to be saved in directory "/output/predictions/frcnn_size{int(im_size)}_p{int(detect_threshold * 100)}/"'
    )
    progbar = Progbar(len(os.listdir(img_path)) - 1)

    for idx, img_name in enumerate(sorted(os.listdir(img_path))):
        if not img_name.lower().endswith(('.png', '.jpg')):
            continue

        filepath = os.path.join(img_path, img_name)

        start_time = time.time()
        img = cv2.imread(filepath)

        X, ratio = format_img(img, C)

        X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   overlap_thresh=overlap_threshold,
                                   max_boxes=C.max_boxes)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}
        time_list = []

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < detect_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                # only gate objects
                cls_name = 'gate'

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                # only gate objects, which is index 0
                cls_num = 0

                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass

                bboxes[cls_name].append([
                    C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                    C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=overlap_threshold)

            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                w = (x2 - x1)
                h = (y2 - y1)
                scale = 0.16
                x1 += w * scale
                x2 -= w * scale
                y1 += h * scale
                y2 -= h * scale

                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                writer.writerow([
                    img_name, real_x1, real_y1, real_x2, real_y1, real_x2,
                    real_y2, real_x1, real_y2
                ])

                if mode == 'normal':
                    cv2.rectangle(img, (real_x1, real_y1), (real_x2, real_y2),
                                  (int(class_to_color[key][0]),
                                   int(class_to_color[key][1]),
                                   int(class_to_color[key][2])), 2)

                    textLabel = f'{key}: {int(100 * new_probs[jk])}%'

                    (retval,
                     baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                                 1)
                    textOrg = (real_x1, real_y1)

                    cv2.rectangle(img, (textOrg[0], textOrg[1] + baseLine - 5),
                                  (textOrg[0] + retval[0] + 5,
                                   textOrg[1] - retval[1] - 5),
                                  (255, 255, 255), 2)
                    cv2.rectangle(img, (textOrg[0], textOrg[1] + baseLine - 5),
                                  (textOrg[0] + retval[0] + 5,
                                   textOrg[1] - retval[1] - 5), (0, 128, 255),
                                  -1)
                    cv2.putText(img, textLabel, (real_x1, real_y1 - 3),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255),
                                1)

        time_taken = time.time() - start_time
        progbar.update(idx)
        print(f'   -   Elapsed time: {time_taken:.3}s for {img_name}')
        if idx > 20:
            time_list.append(time_taken)

        if mode == 'normal':
            cv2.imwrite(
                f'output/predictions/frcnn_size{int(im_size)}_p{int(detect_threshold * 100)}/predict_{img_name}',
                img)
            plt.close()

    output_file.close()

    # return the prediction time over the last 10 images
    return sum(time_list) / len(time_list)