Exemplo n.º 1
0
def generate_mean_pixel_file():
    C = Config()
    all_imgs, _, _ = get_data(ROI_BBOX_FILE)

    avg = [0, 0, 0]
    for img_data in all_imgs:
        print(img_data['filepath'])
        img_data_aug, x_img = augment(img_data, C, augment=False)

        (width, height) = (img_data_aug['width'], img_data_aug['height'])
        (rows, cols, _) = x_img.shape

        # get image dimensions for resizing
        (resized_width,
         resized_height) = get_new_img_size(width, height, C.im_size)

        # resize the image so that smalles side is length = 600px
        x_img = cv2.resize(x_img, (resized_width, resized_height),
                           interpolation=cv2.INTER_CUBIC)
        pixels = (resized_width * resized_height)
        avg[0] += np.sum(x_img[:, :, 0]) / pixels
        avg[1] += np.sum(x_img[:, :, 1]) / pixels
        avg[2] += np.sum(x_img[:, :, 2]) / pixels
    avg = [a / len(all_imgs) for a in list(avg)]
    np.savetxt(MEAN_PIXEL_FILE, avg, delimiter=',')
Exemplo n.º 2
0
    def __init__(self, model_path):
        self.model_path = model_path

        if os.path.exists('config.pickle'):
            with open('config.pickle', 'rb') as f:
                self.cfg = pickle.load(f)
        else:
            self.cfg = Config()
            print('Not found previous train and saved config.pickle file. may lose class map info.')
        self._init_model()
Exemplo n.º 3
0
# parser.add_option("-p", "--path", dest="test_path", help="Path to test data.")
# parser.add_option("-n", "--num_rois", type="int", dest="num_rois",
# 				help="Number of ROIs per iteration. Higher means more memory use.", default=32)
# parser.add_option("--config_filename", dest="config_filename", help=
# 				"Location to read the metadata related to the training (generated when training).",
# 				default="config.pickle")
# parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
#
# (options, args) = parser.parse_args()
#
# if not options.test_path:   # if filename is not given
# 	parser.error('Error: path to test data must be specified. Pass --path to command line')

# config_output_filename = options.config_filename

C = Config()

if C.network == 'resnet50':
    import keras_frcnn.resnet as nn
elif C.network == 'vgg':
    import keras_frcnn.vgg as nn

# turn off any data augmentation at test time
C.use_horizontal_flips = False
C.use_vertical_flips = False
C.rot_90 = False

# img_path = options.test_path


def format_img_size(img, C):
Exemplo n.º 4
0
def start():
    c = Config()
    with open('config_default_resnet.pickle', 'wb') as f:
        pickle.dump(c, f, protocol=pickle.HIGHEST_PROTOCOL)
Exemplo n.º 5
0
    args = parser.parse_args()

    model_path = args.model_path
    config_path = args.config_path
    eval_files_list_path = args.eval_list
    images_folder_path = args.images_folder_path
    bbox_dict_path = args.gt_bbox_dict_path
    output_folder_path = args.output_folder_path
    iou_threshold = args.iou_threshold
    max_bboxes = args.max_bboxes

    if os.path.exists(config_path):
        with open(config_path, "rb") as f:
            C = pickle.load(f)
    else:
        C = Config()

    num_features = 512

    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (VGG here, can be Resnet50, Inception, etc)
    shared_layers = nn_base(img_input, trainable=False)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
Exemplo n.º 6
0
def predict(model_name, in_dir="train_cleaned", bbox_threshold=0.5):
    C = Config(**load_config(model_name))
    C.use_horizontal_flips = False
    C.use_vertical_flips = False

    class_mapping = get_class_mappings()

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (1024, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, 1024)

    model_rpn = get_model_rpn(input_shape_img, C)
    model_classifier = get_model_classifier(class_mapping,
                                            input_shape_features, C)

    images = sorted(
        glob.glob(os.path.join(DATA_DIR, in_dir, "**/*.jpg"), recursive=True))
    print("Found " + str(len(images)) + " images...")

    probs = []
    boxes = []
    try:
        for idx, img_name in tqdm(enumerate(images), total=len(images)):
            img = cv2.imread(img_name)
            height, width, _ = img.shape
            X, new_width, new_height = format_img(img, C)

            if K.image_dim_ordering() == 'tf':
                X = np.transpose(X, (0, 2, 3, 1))
            # get the feature maps and output from the RPN
            [Y1, Y2, F] = model_rpn.predict(X)

            R = rpn_to_roi(Y1,
                           Y2,
                           C,
                           K.image_dim_ordering(),
                           overlap_thresh=0.7)

            # convert from (x1,y1,x2,y2) to (x,y,w,h)
            R[:, 2] = R[:, 2] - R[:, 0]
            R[:, 3] = R[:, 3] - R[:, 1]

            # apply the spatial pyramid pooling to the proposed regions
            bboxes = {}
            boxes.append({})
            probs.append({})
            for jk in range(R.shape[0] // C.num_rois + 1):
                ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois *
                                        (jk + 1), :],
                                      axis=0)
                if ROIs.shape[1] == 0:
                    break

                if jk == R.shape[0] // C.num_rois:
                    # pad R
                    curr_shape = ROIs.shape
                    target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                    ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                    ROIs_padded[:, :curr_shape[1], :] = ROIs
                    ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                    ROIs = ROIs_padded

                [P_cls, P_regr] = model_classifier.predict([F, ROIs])
                P_regr = P_regr / C.std_scaling

                for ii in range(P_cls.shape[1]):
                    if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                            P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                        continue

                    cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]
                    if cls_name not in bboxes:
                        bboxes[cls_name] = []
                        boxes[idx][cls_name] = []
                        probs[idx][cls_name] = []

                    (x, y, w, h) = ROIs[0, ii, :]

                    cls_num = np.argmax(P_cls[0, ii, :])
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    x, y, w, h = apply_regr(x, y, w, h, tx, ty, tw, th)

                    bboxes[cls_name].append(
                        [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)])
                    probs[idx][cls_name].append(np.max(P_cls[0, ii, :]))

            for key in bboxes:
                bbox = np.array(bboxes[key])
                boxes[idx][key] = [
                    resize_bounding_box(width / new_width, height / new_height,
                                        b) for b in bbox
                ]
    except KeyboardInterrupt:
        pass
    save_predictions(model_name, in_dir, images, boxes, probs)