Exemplo n.º 1
0
    def __init__(self, cfgs, is_training):

        self.cfgs = cfgs
        self.base_network_name = cfgs.NET_NAME
        self.is_training = is_training
        if cfgs.METHOD == 'H':
            self.num_anchors_per_location = len(cfgs.ANCHOR_SCALES) * len(cfgs.ANCHOR_RATIOS)
        else:
            self.num_anchors_per_location = len(cfgs.ANCHOR_SCALES) * len(cfgs.ANCHOR_RATIOS) * len(cfgs.ANCHOR_ANGLES)
        self.method = cfgs.METHOD
        self.losses_dict = {}
        self.drawer = DrawBoxTensor(cfgs)
        self.backbone = BuildBackbone(cfgs, is_training)
        self.pretrain_zoo = PretrainModelZoo()
Exemplo n.º 2
0
    def read_and_prepocess_single_img(self, filename_queue, shortside_len,
                                      is_training):

        img_name, img, gtboxes_and_label, num_objects = self.read_single_example_and_decode(
            filename_queue)

        img = tf.cast(img, tf.float32)

        if is_training:

            if self.cfgs.RGB2GRAY:
                # img, gtboxes_and_label = image_preprocess.aspect_ratio_jittering(img, gtboxes_and_label)
                img = self.image_preprocess.random_rgb2gray(
                    img_tensor=img, gtboxes_and_label=gtboxes_and_label)

            if self.cfgs.IMG_ROTATE:
                img, gtboxes_and_label = self.image_preprocess.random_rotate_img(
                    img_tensor=img, gtboxes_and_label=gtboxes_and_label)

            img, gtboxes_and_label, img_h, img_w = self.image_preprocess.short_side_resize(
                img_tensor=img,
                gtboxes_and_label=gtboxes_and_label,
                target_shortside_len=shortside_len,
                length_limitation=self.cfgs.IMG_MAX_LENGTH)

            if self.cfgs.HORIZONTAL_FLIP:
                img, gtboxes_and_label = self.image_preprocess.random_flip_left_right(
                    img_tensor=img, gtboxes_and_label=gtboxes_and_label)
            if self.cfgs.VERTICAL_FLIP:
                img, gtboxes_and_label = self.image_preprocess.random_flip_up_down(
                    img_tensor=img, gtboxes_and_label=gtboxes_and_label)

        else:
            img, gtboxes_and_label, img_h, img_w = self.image_preprocess.short_side_resize(
                img_tensor=img,
                gtboxes_and_label=gtboxes_and_label,
                target_shortside_len=shortside_len,
                length_limitation=self.cfgs.IMG_MAX_LENGTH)
        pretrain_zoo = PretrainModelZoo()
        if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo:
            img = img / 255 - tf.constant([[self.cfgs.PIXEL_MEAN_]])
        else:
            img = img - tf.constant([[self.cfgs.PIXEL_MEAN]
                                     ])  # sub pixel mean at last
        return img_name, img, gtboxes_and_label, num_objects, img_h, img_w
Exemplo n.º 3
0
    def draw_boxes_with_label_and_scores(self, img_array, boxes, labels, scores, method, head=None, is_csl=False, in_graph=True):
        if in_graph:
            pretrain_zoo = PretrainModelZoo()
            if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo:
                img_array = (img_array * np.array(self.cfgs.PIXEL_STD) + np.array(self.cfgs.PIXEL_MEAN_)) * 255
            else:
                img_array = img_array + np.array(self.cfgs.PIXEL_MEAN)
        if method == 3:
            img_array = self.draw_boxes_ellipse(img_array, boxes, labels)
        img_array.astype(np.float32)
        boxes = boxes.astype(np.float32)
        labels = labels.astype(np.int32)
        img_array = np.array(img_array * 255 / np.max(img_array), dtype=np.uint8)

        img_obj = Image.fromarray(img_array)
        raw_img_obj = img_obj.copy()

        draw_obj = ImageDraw.Draw(img_obj)
        num_of_objs = 0

        if head is None:
            head = np.ones_like(labels) * -1

        for box, a_label, a_score, a_head in zip(boxes, labels, scores, head):

            if a_label != self.NOT_DRAW_BOXES:
                num_of_objs += 1
                self.draw_a_rectangel_in_img(draw_obj, box, color=self.STANDARD_COLORS[a_label], width=3, method=method)
                if a_label == self.ONLY_DRAW_BOXES:  # -1
                    continue
                elif a_label == self.ONLY_DRAW_BOXES_WITH_SCORES:  # -2
                     self.only_draw_scores(draw_obj, box, a_score, color='White')
                else:
                    if is_csl:
                        self.draw_label_with_scores_csl(draw_obj, box, a_label, a_score, method, a_head, color='White')
                    else:
                        self.draw_label_with_scores(draw_obj, box, a_label, a_score, color='White')

        out_img_obj = Image.blend(raw_img_obj, img_obj, alpha=0.7)

        return np.array(out_img_obj)
Exemplo n.º 4
0
NET_NAME = 'resnet50_v1d'  # 'MobilenetV2'

# ---------------------------------------- System
ROOT_PATH = os.path.abspath('../../')
print(20 * "++--")
print(ROOT_PATH)
GPU_GROUP = "0,1,2"
NUM_GPU = len(GPU_GROUP.strip().split(','))
SHOW_TRAIN_INFO_INTE = 20
SMRY_ITER = 200
SAVE_WEIGHTS_INTE = 27000 * 2

SUMMARY_PATH = os.path.join(ROOT_PATH, 'output/summary')
TEST_SAVE_PATH = os.path.join(ROOT_PATH, 'tools/test_result')

pretrain_zoo = PretrainModelZoo()
PRETRAINED_CKPT = pretrain_zoo.pretrain_weight_path(NET_NAME, ROOT_PATH)
TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
EVALUATE_R_DIR = os.path.join(ROOT_PATH, 'output/evaluate_result_pickle/')

# ------------------------------------------ Train and test
RESTORE_FROM_RPN = False
FIXED_BLOCKS = 1  # allow 0~3
FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
USE_07_METRIC = True
ADD_BOX_IN_TENSORBOARD = True

MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip

CLS_WEIGHT = 1.0
Exemplo n.º 5
0
    def main(self):
        with tf.Graph().as_default() as graph, tf.device('/cpu:0'):

            num_gpu = len(cfgs.GPU_GROUP.strip().split(','))
            global_step = slim.get_or_create_global_step()
            lr = self.warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu)
            tf.summary.scalar('lr', lr)

            optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
            r3det = build_whole_network.DetectionNetworkR3Det(cfgs=self.cfgs,
                                                              is_training=True)

            with tf.name_scope('get_batch'):
                if cfgs.IMAGE_PYRAMID:
                    shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN)
                    shortside_len = tf.random_shuffle(shortside_len_list)[0]

                else:
                    shortside_len = cfgs.IMG_SHORT_SIDE_LEN

                img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \
                    self.reader.next_batch(dataset_name=cfgs.DATASET_NAME,
                                           batch_size=cfgs.BATCH_SIZE * num_gpu,
                                           shortside_len=shortside_len,
                                           is_training=True)

            # data processing
            inputs_list = []
            for i in range(num_gpu):
                img = tf.expand_dims(img_batch[i], axis=0)
                pretrain_zoo = PretrainModelZoo()
                if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo:
                    img = img / tf.constant([cfgs.PIXEL_STD])

                gtboxes_and_label_r = tf.py_func(
                    backward_convert,
                    inp=[gtboxes_and_label_batch[i]],
                    Tout=tf.float32)
                gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6])

                gtboxes_and_label_h = get_horizen_minAreaRectangle(
                    gtboxes_and_label_batch[i])
                gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5])

                num_objects = num_objects_batch[i]
                num_objects = tf.cast(tf.reshape(num_objects, [
                    -1,
                ]), tf.float32)

                img_h = img_h_batch[i]
                img_w = img_w_batch[i]

                inputs_list.append([
                    img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects,
                    img_h, img_w
                ])

            tower_grads = []
            biases_regularizer = tf.no_regularizer
            weights_regularizer = tf.contrib.layers.l2_regularizer(
                cfgs.WEIGHT_DECAY)

            with tf.variable_scope(tf.get_variable_scope()):
                for i in range(num_gpu):
                    with tf.device('/gpu:%d' % i):
                        with tf.name_scope('tower_%d' % i):
                            with slim.arg_scope(
                                [slim.model_variable, slim.variable],
                                    device='/device:CPU:0'):
                                with slim.arg_scope(
                                    [
                                        slim.conv2d, slim.conv2d_in_plane,
                                        slim.conv2d_transpose,
                                        slim.separable_conv2d,
                                        slim.fully_connected
                                    ],
                                        weights_regularizer=weights_regularizer,
                                        biases_regularizer=biases_regularizer,
                                        biases_initializer=tf.
                                        constant_initializer(0.0)):

                                    gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func(
                                        self.get_gtboxes_and_label,
                                        inp=[
                                            inputs_list[i][1],
                                            inputs_list[i][2],
                                            inputs_list[i][3]
                                        ],
                                        Tout=[tf.float32, tf.float32])
                                    gtboxes_and_label_h = tf.reshape(
                                        gtboxes_and_label_h, [-1, 5])
                                    gtboxes_and_label_r = tf.reshape(
                                        gtboxes_and_label_r, [-1, 6])

                                    img = inputs_list[i][0]
                                    img_shape = inputs_list[i][-2:]
                                    img = tf.image.crop_to_bounding_box(
                                        image=img,
                                        offset_height=0,
                                        offset_width=0,
                                        target_height=tf.cast(
                                            img_shape[0], tf.int32),
                                        target_width=tf.cast(
                                            img_shape[1], tf.int32))

                                    outputs = r3det.build_whole_detection_network(
                                        input_img_batch=img,
                                        gtboxes_batch_h=gtboxes_and_label_h,
                                        gtboxes_batch_r=gtboxes_and_label_r,
                                        gpu_id=i)
                                    gtboxes_in_img_h = self.drawer.draw_boxes_with_categories(
                                        img_batch=img,
                                        boxes=gtboxes_and_label_h[:, :-1],
                                        labels=gtboxes_and_label_h[:, -1],
                                        method=0)
                                    gtboxes_in_img_r = self.drawer.draw_boxes_with_categories(
                                        img_batch=img,
                                        boxes=gtboxes_and_label_r[:, :-1],
                                        labels=gtboxes_and_label_r[:, -1],
                                        method=1)
                                    tf.summary.image(
                                        'Compare/gtboxes_h_gpu:%d' % i,
                                        gtboxes_in_img_h)
                                    tf.summary.image(
                                        'Compare/gtboxes_r_gpu:%d' % i,
                                        gtboxes_in_img_r)

                                    if cfgs.ADD_BOX_IN_TENSORBOARD:
                                        detections_in_img = self.drawer.draw_boxes_with_categories_and_scores(
                                            img_batch=img,
                                            boxes=outputs[0],
                                            scores=outputs[1],
                                            labels=outputs[2],
                                            method=1)
                                        tf.summary.image(
                                            'Compare/final_detection_gpu:%d' %
                                            i, detections_in_img)

                                    loss_dict = outputs[-1]
                                    total_loss_dict, total_losses = self.loss_dict(
                                        loss_dict, num_gpu)

                                    if i == num_gpu - 1:
                                        regularization_losses = tf.get_collection(
                                            tf.GraphKeys.REGULARIZATION_LOSSES)
                                        # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
                                        total_losses = total_losses + tf.add_n(
                                            regularization_losses)

                            tf.get_variable_scope().reuse_variables()
                            grads = optimizer.compute_gradients(total_losses)
                            if cfgs.GRADIENT_CLIPPING_BY_NORM is not None:
                                grads = slim.learning.clip_gradient_norms(
                                    grads, cfgs.GRADIENT_CLIPPING_BY_NORM)
                            tower_grads.append(grads)
            self.log_printer(r3det, optimizer, global_step, tower_grads,
                             total_loss_dict, num_gpu, graph)
    def eval_with_plac(self, img_dir, det_net, image_ext):

        os.environ["CUDA_VISIBLE_DEVICES"] = self.args.gpu
        # 1. preprocess img
        img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None,
                                                         3])  # is RGB. not BGR
        img_batch = tf.cast(img_plac, tf.float32)

        pretrain_zoo = PretrainModelZoo()
        if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo:
            img_batch = (img_batch / 255 - tf.constant(
                self.cfgs.PIXEL_MEAN_)) / tf.constant(self.cfgs.PIXEL_STD)
        else:
            img_batch = img_batch - tf.constant(self.cfgs.PIXEL_MEAN)

        img_batch = tf.expand_dims(img_batch, axis=0)

        detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
            input_img_batch=img_batch)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        restorer, restore_ckpt = det_net.get_restorer()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:
            sess.run(init_op)
            if not restorer is None:
                restorer.restore(sess, restore_ckpt)
                print('restore model')

            all_boxes_r = []
            imgs = os.listdir(img_dir)
            pbar = tqdm(imgs)
            for a_img_name in pbar:
                a_img_name = a_img_name.split(image_ext)[0]

                raw_img = cv2.imread(
                    os.path.join(img_dir, a_img_name + image_ext))
                raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

                det_boxes_r_all, det_scores_r_all, det_category_r_all = [], [], []

                img_short_side_len_list = self.cfgs.IMG_SHORT_SIDE_LEN if isinstance(
                    self.cfgs.IMG_SHORT_SIDE_LEN,
                    list) else [self.cfgs.IMG_SHORT_SIDE_LEN]
                img_short_side_len_list = [
                    img_short_side_len_list[0]
                ] if not self.args.multi_scale else img_short_side_len_list

                for short_size in img_short_side_len_list:
                    max_len = self.cfgs.IMG_MAX_LENGTH
                    if raw_h < raw_w:
                        new_h, new_w = short_size, min(
                            int(short_size * float(raw_w) / raw_h), max_len)
                    else:
                        new_h, new_w = min(
                            int(short_size * float(raw_h) / raw_w),
                            max_len), short_size
                    img_resize = cv2.resize(raw_img, (new_w, new_h))

                    resized_img, detected_boxes, detected_scores, detected_categories = \
                        sess.run(
                            [img_batch, detection_boxes, detection_scores, detection_category],
                            feed_dict={img_plac: img_resize[:, :, ::-1]}
                        )

                    if detected_boxes.shape[0] == 0:
                        continue
                    resized_h, resized_w = resized_img.shape[
                        1], resized_img.shape[2]
                    detected_boxes = forward_convert(detected_boxes, False)
                    detected_boxes[:, 0::2] *= (raw_w / resized_w)
                    detected_boxes[:, 1::2] *= (raw_h / resized_h)

                    det_boxes_r_all.extend(detected_boxes)
                    det_scores_r_all.extend(detected_scores)
                    det_category_r_all.extend(detected_categories)
                det_boxes_r_all = np.array(det_boxes_r_all)
                det_scores_r_all = np.array(det_scores_r_all)
                det_category_r_all = np.array(det_category_r_all)

                box_res_rotate_ = []
                label_res_rotate_ = []
                score_res_rotate_ = []

                if det_scores_r_all.shape[0] != 0:
                    for sub_class in range(1, self.cfgs.CLASS_NUM + 1):
                        index = np.where(det_category_r_all == sub_class)[0]
                        if len(index) == 0:
                            continue
                        tmp_boxes_r = det_boxes_r_all[index]
                        tmp_label_r = det_category_r_all[index]
                        tmp_score_r = det_scores_r_all[index]

                        if self.args.multi_scale:
                            tmp_boxes_r_ = backward_convert(tmp_boxes_r, False)

                            # try:
                            #     inx = nms_rotate.nms_rotate_cpu(boxes=np.array(tmp_boxes_r_),
                            #                                     scores=np.array(tmp_score_r),
                            #                                     iou_threshold=self.cfgs.NMS_IOU_THRESHOLD,
                            #                                     max_output_size=5000)
                            # except:
                            tmp_boxes_r_ = np.array(tmp_boxes_r_)
                            tmp = np.zeros([
                                tmp_boxes_r_.shape[0],
                                tmp_boxes_r_.shape[1] + 1
                            ])
                            tmp[:, 0:-1] = tmp_boxes_r_
                            tmp[:, -1] = np.array(tmp_score_r)
                            # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms
                            jitter = np.zeros([
                                tmp_boxes_r_.shape[0],
                                tmp_boxes_r_.shape[1] + 1
                            ])
                            jitter[:, 0] += np.random.rand(
                                tmp_boxes_r_.shape[0], ) / 1000
                            inx = rotate_gpu_nms(
                                np.array(tmp, np.float32) +
                                np.array(jitter, np.float32),
                                float(self.cfgs.NMS_IOU_THRESHOLD), 0)
                        else:
                            inx = np.arange(0, tmp_score_r.shape[0])

                        box_res_rotate_.extend(np.array(tmp_boxes_r)[inx])
                        score_res_rotate_.extend(np.array(tmp_score_r)[inx])
                        label_res_rotate_.extend(np.array(tmp_label_r)[inx])

                if len(box_res_rotate_) == 0:
                    all_boxes_r.append(np.array([]))
                    continue

                det_boxes_r_ = np.array(box_res_rotate_)
                det_scores_r_ = np.array(score_res_rotate_)
                det_category_r_ = np.array(label_res_rotate_)

                if self.args.draw_imgs:
                    detected_indices = det_scores_r_ >= self.cfgs.VIS_SCORE
                    detected_scores = det_scores_r_[detected_indices]
                    detected_boxes = det_boxes_r_[detected_indices]
                    detected_categories = det_category_r_[detected_indices]

                    detected_boxes = backward_convert(detected_boxes, False)

                    drawer = DrawBox(self.cfgs)

                    det_detections_r = drawer.draw_boxes_with_label_and_scores(
                        raw_img[:, :, ::-1],
                        boxes=detected_boxes,
                        labels=detected_categories,
                        scores=detected_scores,
                        method=1,
                        in_graph=True)

                    save_dir = os.path.join('test_hrsc', self.cfgs.VERSION,
                                            'hrsc2016_img_vis')
                    tools.makedirs(save_dir)

                    cv2.imwrite(save_dir + '/{}.jpg'.format(a_img_name),
                                det_detections_r[:, :, ::-1])

                det_boxes_r_ = backward_convert(det_boxes_r_, False)

                x_c, y_c, w, h, theta = det_boxes_r_[:, 0], det_boxes_r_[:, 1], det_boxes_r_[:, 2], \
                                        det_boxes_r_[:, 3], det_boxes_r_[:, 4]

                boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta]))
                dets_r = np.hstack((det_category_r_.reshape(-1, 1),
                                    det_scores_r_.reshape(-1, 1), boxes_r))
                all_boxes_r.append(dets_r)

                pbar.set_description("Eval image %s" % a_img_name)

            # fw1 = open(cfgs.VERSION + '_detections_r.pkl', 'wb')
            # pickle.dump(all_boxes_r, fw1)
            return all_boxes_r
Exemplo n.º 7
0
    def worker(self, gpu_id, images, det_net, result_queue):
        os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)

        img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])  # is RGB. not BGR
        img_batch = tf.cast(img_plac, tf.float32)

        pretrain_zoo = PretrainModelZoo()
        if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo:
            img_batch = (img_batch / 255 - tf.constant(self.cfgs.PIXEL_MEAN_)) / tf.constant(self.cfgs.PIXEL_STD)
        else:
            img_batch = img_batch - tf.constant(self.cfgs.PIXEL_MEAN)

        img_batch = tf.expand_dims(img_batch, axis=0)

        detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
            input_img_batch=img_batch)

        init_op = tf.group(
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        )

        restorer, restore_ckpt = det_net.get_restorer()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:
            sess.run(init_op)
            if not restorer is None:
                restorer.restore(sess, restore_ckpt)
                print('restore model %d ...' % gpu_id)

            for img_path in images:

                # if 'P0015' not in img_path:
                #     continue

                img = cv2.imread(img_path)
                # img = np.load(img_path.replace('images', 'npy').replace('.png', '.npy'))

                box_res_rotate = []
                label_res_rotate = []
                score_res_rotate = []

                imgH = img.shape[0]
                imgW = img.shape[1]

                img_short_side_len_list = self.cfgs.IMG_SHORT_SIDE_LEN if isinstance(self.cfgs.IMG_SHORT_SIDE_LEN, list) else [
                    self.cfgs.IMG_SHORT_SIDE_LEN]
                img_short_side_len_list = [img_short_side_len_list[0]] if not self.args.multi_scale else img_short_side_len_list

                if imgH < self.args.h_len:
                    temp = np.zeros([self.args.h_len, imgW, 3], np.float32)
                    temp[0:imgH, :, :] = img
                    img = temp
                    imgH = self.args.h_len

                if imgW < self.args.w_len:
                    temp = np.zeros([imgH, self.args.w_len, 3], np.float32)
                    temp[:, 0:imgW, :] = img
                    img = temp
                    imgW = self.args.w_len

                for hh in range(0, imgH, self.args.h_len - self.args.h_overlap):
                    if imgH - hh - 1 < self.args.h_len:
                        hh_ = imgH - self.args.h_len
                    else:
                        hh_ = hh
                    for ww in range(0, imgW, self.args.w_len - self.args.w_overlap):
                        if imgW - ww - 1 < self.args.w_len:
                            ww_ = imgW - self.args.w_len
                        else:
                            ww_ = ww
                        src_img = img[hh_:(hh_ + self.args.h_len), ww_:(ww_ + self.args.w_len), :]

                        for short_size in img_short_side_len_list:
                            max_len = self.cfgs.IMG_MAX_LENGTH
                            if self.args.h_len < self.args.w_len:
                                new_h, new_w = short_size, min(int(short_size * float(self.args.w_len) / self.args.h_len), max_len)
                            else:
                                new_h, new_w = min(int(short_size * float(self.args.h_len) / self.args.w_len), max_len), short_size
                            img_resize = cv2.resize(src_img, (new_w, new_h))

                            resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \
                                sess.run(
                                    [img_batch, detection_boxes, detection_scores, detection_category],
                                    feed_dict={img_plac: img_resize[:, :, ::-1]}
                                )

                            resized_h, resized_w = resized_img.shape[1], resized_img.shape[2]
                            src_h, src_w = src_img.shape[0], src_img.shape[1]

                            if len(det_boxes_r_) > 0:
                                det_boxes_r_ = forward_convert(det_boxes_r_, False)
                                det_boxes_r_[:, 0::2] *= (src_w / resized_w)
                                det_boxes_r_[:, 1::2] *= (src_h / resized_h)

                                for ii in range(len(det_boxes_r_)):
                                    box_rotate = det_boxes_r_[ii]
                                    box_rotate[0::2] = box_rotate[0::2] + ww_
                                    box_rotate[1::2] = box_rotate[1::2] + hh_
                                    box_res_rotate.append(box_rotate)
                                    label_res_rotate.append(det_category_r_[ii])
                                    score_res_rotate.append(det_scores_r_[ii])

                            if self.args.flip_img:
                                det_boxes_r_flip, det_scores_r_flip, det_category_r_flip = \
                                    sess.run(
                                        [detection_boxes, detection_scores, detection_category],
                                        feed_dict={img_plac: cv2.flip(img_resize, flipCode=1)[:, :, ::-1]}
                                    )
                                if len(det_boxes_r_flip) > 0:
                                    det_boxes_r_flip = forward_convert(det_boxes_r_flip, False)
                                    det_boxes_r_flip[:, 0::2] *= (src_w / resized_w)
                                    det_boxes_r_flip[:, 1::2] *= (src_h / resized_h)

                                    for ii in range(len(det_boxes_r_flip)):
                                        box_rotate = det_boxes_r_flip[ii]
                                        box_rotate[0::2] = (src_w - box_rotate[0::2]) + ww_
                                        box_rotate[1::2] = box_rotate[1::2] + hh_
                                        box_res_rotate.append(box_rotate)
                                        label_res_rotate.append(det_category_r_flip[ii])
                                        score_res_rotate.append(det_scores_r_flip[ii])

                                det_boxes_r_flip, det_scores_r_flip, det_category_r_flip = \
                                    sess.run(
                                        [detection_boxes, detection_scores, detection_category],
                                        feed_dict={img_plac: cv2.flip(img_resize, flipCode=0)[:, :, ::-1]}
                                    )
                                if len(det_boxes_r_flip) > 0:
                                    det_boxes_r_flip = forward_convert(det_boxes_r_flip, False)
                                    det_boxes_r_flip[:, 0::2] *= (src_w / resized_w)
                                    det_boxes_r_flip[:, 1::2] *= (src_h / resized_h)

                                    for ii in range(len(det_boxes_r_flip)):
                                        box_rotate = det_boxes_r_flip[ii]
                                        box_rotate[0::2] = box_rotate[0::2] + ww_
                                        box_rotate[1::2] = (src_h - box_rotate[1::2]) + hh_
                                        box_res_rotate.append(box_rotate)
                                        label_res_rotate.append(det_category_r_flip[ii])
                                        score_res_rotate.append(det_scores_r_flip[ii])

                box_res_rotate = np.array(box_res_rotate)
                label_res_rotate = np.array(label_res_rotate)
                score_res_rotate = np.array(score_res_rotate)

                box_res_rotate_ = []
                label_res_rotate_ = []
                score_res_rotate_ = []
                threshold = {'roundabout': 0.1, 'tennis-court': 0.3, 'swimming-pool': 0.1, 'storage-tank': 0.2,
                             'soccer-ball-field': 0.3, 'small-vehicle': 0.2, 'ship': 0.2, 'plane': 0.3,
                             'large-vehicle': 0.1, 'helicopter': 0.2, 'harbor': 0.0001, 'ground-track-field': 0.3,
                             'bridge': 0.0001, 'basketball-court': 0.3, 'baseball-diamond': 0.3,
                             'container-crane': 0.05, 'airport': 0.1, 'helipad': 0.1}

                for sub_class in range(1, self.cfgs.CLASS_NUM + 1):
                    index = np.where(label_res_rotate == sub_class)[0]
                    if len(index) == 0:
                        continue
                    tmp_boxes_r = box_res_rotate[index]
                    tmp_label_r = label_res_rotate[index]
                    tmp_score_r = score_res_rotate[index]

                    tmp_boxes_r_ = backward_convert(tmp_boxes_r, False)

                    # try:
                    #     inx = nms_rotate.nms_rotate_cpu(boxes=np.array(tmp_boxes_r_),
                    #                                     scores=np.array(tmp_score_r),
                    #                                     iou_threshold=threshold[self.label_name_map[sub_class]],
                    #                                     max_output_size=5000)
                    #
                    # except:
                    tmp_boxes_r_ = np.array(tmp_boxes_r_)
                    tmp = np.zeros([tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1])
                    tmp[:, 0:-1] = tmp_boxes_r_
                    tmp[:, -1] = np.array(tmp_score_r)
                    # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms
                    jitter = np.zeros([tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1])
                    jitter[:, 0] += np.random.rand(tmp_boxes_r_.shape[0], ) / 1000
                    inx = rotate_gpu_nms(np.array(tmp, np.float32) + np.array(jitter, np.float32),
                                         float(threshold[self.label_name_map[sub_class]]), 0)

                    box_res_rotate_.extend(np.array(tmp_boxes_r)[inx])
                    score_res_rotate_.extend(np.array(tmp_score_r)[inx])
                    label_res_rotate_.extend(np.array(tmp_label_r)[inx])

                result_dict = {'boxes': np.array(box_res_rotate_), 'scores': np.array(score_res_rotate_),
                               'labels': np.array(label_res_rotate_), 'image_id': img_path}
                result_queue.put_nowait(result_dict)
    def worker(self, gpu_id, images, det_net, result_queue):
        os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
        # 1. preprocess img
        img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None,
                                                         3])  # is RGB. not BGR
        img_batch = tf.cast(img_plac, tf.float32)

        pretrain_zoo = PretrainModelZoo()
        if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo:
            img_batch = (img_batch / 255 - tf.constant(
                self.cfgs.PIXEL_MEAN_)) / tf.constant(self.cfgs.PIXEL_STD)
        else:
            img_batch = img_batch - tf.constant(self.cfgs.PIXEL_MEAN)

        img_batch = tf.expand_dims(img_batch, axis=0)

        detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
            input_img_batch=img_batch,
            gtboxes_batch_h=None,
            gtboxes_batch_r=None,
            gpu_id=0)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        restorer, restore_ckpt = det_net.get_restorer()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:
            sess.run(init_op)
            if not restorer is None:
                restorer.restore(sess, restore_ckpt)
                print('restore model %d ...' % gpu_id)
            for a_img in images:
                raw_img = cv2.imread(a_img)
                raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

                det_boxes_r_all, det_scores_r_all, det_category_r_all = [], [], []

                img_short_side_len_list = self.cfgs.IMG_SHORT_SIDE_LEN if isinstance(
                    self.cfgs.IMG_SHORT_SIDE_LEN,
                    list) else [self.cfgs.IMG_SHORT_SIDE_LEN]
                img_short_side_len_list = [
                    img_short_side_len_list[0]
                ] if not self.args.multi_scale else img_short_side_len_list

                for short_size in img_short_side_len_list:
                    max_len = self.cfgs.IMG_MAX_LENGTH
                    if raw_h < raw_w:
                        new_h, new_w = short_size, min(
                            int(short_size * float(raw_w) / raw_h), max_len)
                    else:
                        new_h, new_w = min(
                            int(short_size * float(raw_h) / raw_w),
                            max_len), short_size
                    img_resize = cv2.resize(raw_img, (new_w, new_h))

                    resized_img, detected_boxes, detected_scores, detected_categories = \
                        sess.run(
                            [img_batch, detection_boxes, detection_scores, detection_category],
                            feed_dict={img_plac: img_resize[:, :, ::-1]}
                        )

                    detected_indices = detected_scores >= self.cfgs.VIS_SCORE
                    detected_scores = detected_scores[detected_indices]
                    detected_boxes = detected_boxes[detected_indices]
                    detected_categories = detected_categories[detected_indices]

                    if detected_boxes.shape[0] == 0:
                        continue
                    resized_h, resized_w = resized_img.shape[
                        1], resized_img.shape[2]
                    detected_boxes = forward_convert(detected_boxes, False)
                    detected_boxes[:, 0::2] *= (raw_w / resized_w)
                    detected_boxes[:, 1::2] *= (raw_h / resized_h)

                    det_boxes_r_all.extend(detected_boxes)
                    det_scores_r_all.extend(detected_scores)
                    det_category_r_all.extend(detected_categories)

                    if self.args.flip_img:
                        detected_boxes, detected_scores, detected_categories = \
                            sess.run(
                                [detection_boxes, detection_scores, detection_category],
                                feed_dict={img_plac: cv2.flip(img_resize, flipCode=1)[:, :, ::-1]}
                            )
                        detected_indices = detected_scores >= self.cfgs.VIS_SCORE
                        detected_scores = detected_scores[detected_indices]
                        detected_boxes = detected_boxes[detected_indices]
                        detected_categories = detected_categories[
                            detected_indices]

                        if detected_boxes.shape[0] == 0:
                            continue
                        resized_h, resized_w = resized_img.shape[
                            1], resized_img.shape[2]
                        detected_boxes = forward_convert(detected_boxes, False)
                        detected_boxes[:, 0::2] *= (raw_w / resized_w)
                        detected_boxes[:, 0::2] = (raw_w -
                                                   detected_boxes[:, 0::2])
                        detected_boxes[:, 1::2] *= (raw_h / resized_h)

                        det_boxes_r_all.extend(sort_corners(detected_boxes))
                        det_scores_r_all.extend(detected_scores)
                        det_category_r_all.extend(detected_categories)

                        detected_boxes, detected_scores, detected_categories = \
                            sess.run(
                                [detection_boxes, detection_scores, detection_category],
                                feed_dict={img_plac: cv2.flip(img_resize, flipCode=0)[:, :, ::-1]}
                            )
                        detected_indices = detected_scores >= self.cfgs.VIS_SCORE
                        detected_scores = detected_scores[detected_indices]
                        detected_boxes = detected_boxes[detected_indices]
                        detected_categories = detected_categories[
                            detected_indices]

                        if detected_boxes.shape[0] == 0:
                            continue
                        resized_h, resized_w = resized_img.shape[
                            1], resized_img.shape[2]
                        detected_boxes = forward_convert(detected_boxes, False)
                        detected_boxes[:, 0::2] *= (raw_w / resized_w)
                        detected_boxes[:, 1::2] *= (raw_h / resized_h)
                        detected_boxes[:, 1::2] = (raw_h -
                                                   detected_boxes[:, 1::2])
                        det_boxes_r_all.extend(sort_corners(detected_boxes))
                        det_scores_r_all.extend(detected_scores)
                        det_category_r_all.extend(detected_categories)

                det_boxes_r_all = np.array(det_boxes_r_all)
                det_scores_r_all = np.array(det_scores_r_all)
                det_category_r_all = np.array(det_category_r_all)

                box_res_rotate_ = []
                label_res_rotate_ = []
                score_res_rotate_ = []

                if det_scores_r_all.shape[0] != 0:
                    for sub_class in range(1, self.cfgs.CLASS_NUM + 1):
                        index = np.where(det_category_r_all == sub_class)[0]
                        if len(index) == 0:
                            continue
                        tmp_boxes_r = det_boxes_r_all[index]
                        tmp_label_r = det_category_r_all[index]
                        tmp_score_r = det_scores_r_all[index]

                        if self.args.multi_scale:
                            tmp_boxes_r_ = backward_convert(tmp_boxes_r, False)

                            # try:
                            #     inx = nms_rotate.nms_rotate_cpu(boxes=np.array(tmp_boxes_r_),
                            #                                     scores=np.array(tmp_score_r),
                            #                                     iou_threshold=self.cfgs.NMS_IOU_THRESHOLD,
                            #                                     max_output_size=5000)
                            # except:
                            tmp_boxes_r_ = np.array(tmp_boxes_r_)
                            tmp = np.zeros([
                                tmp_boxes_r_.shape[0],
                                tmp_boxes_r_.shape[1] + 1
                            ])
                            tmp[:, 0:-1] = tmp_boxes_r_
                            tmp[:, -1] = np.array(tmp_score_r)
                            # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms
                            jitter = np.zeros([
                                tmp_boxes_r_.shape[0],
                                tmp_boxes_r_.shape[1] + 1
                            ])
                            jitter[:, 0] += np.random.rand(
                                tmp_boxes_r_.shape[0], ) / 1000
                            inx = rotate_gpu_nms(
                                np.array(tmp, np.float32) +
                                np.array(jitter, np.float32),
                                float(self.cfgs.NMS_IOU_THRESHOLD), 0)
                        else:
                            inx = np.arange(0, tmp_score_r.shape[0])

                        box_res_rotate_.extend(np.array(tmp_boxes_r)[inx])
                        score_res_rotate_.extend(np.array(tmp_score_r)[inx])
                        label_res_rotate_.extend(np.array(tmp_label_r)[inx])

                box_res_rotate_ = np.array(box_res_rotate_)
                score_res_rotate_ = np.array(score_res_rotate_)
                label_res_rotate_ = np.array(label_res_rotate_)

                result_dict = {
                    'scales': [1, 1],
                    'boxes': box_res_rotate_,
                    'scores': score_res_rotate_,
                    'labels': label_res_rotate_,
                    'image_id': a_img
                }
                result_queue.put_nowait(result_dict)
Exemplo n.º 9
0
 def __init__(self, cfgs, is_training):
     self.cfgs = cfgs
     self.base_network_name = cfgs.NET_NAME
     self.is_training = is_training
     self.fpn_func = self.fpn_mode(cfgs.FPN_MODE)
     self.pretrain_zoo = PretrainModelZoo()
Exemplo n.º 10
0
    def eval_with_plac(self, img_dir, det_net, image_ext):

        os.environ["CUDA_VISIBLE_DEVICES"] = self.args.gpu
        # 1. preprocess img
        img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])  # is RGB. not BGR
        img_batch = tf.cast(img_plac, tf.float32)

        pretrain_zoo = PretrainModelZoo()
        if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo:
            img_batch = (img_batch / 255 - tf.constant(self.cfgs.PIXEL_MEAN_)) / tf.constant(self.cfgs.PIXEL_STD)
        else:
            img_batch = img_batch - tf.constant(self.cfgs.PIXEL_MEAN)

        img_batch = tf.expand_dims(img_batch, axis=0)

        output = det_net.build_whole_detection_network(
            input_img_batch=img_batch)

        init_op = tf.group(
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        )

        restorer, restore_ckpt = det_net.get_restorer()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:
            sess.run(init_op)
            if not restorer is None:
                restorer.restore(sess, restore_ckpt)
                print('restore model')

            all_boxes_r = []
            imgs = os.listdir(img_dir)
            pbar = tqdm(imgs)
            for a_img_name in pbar:
                a_img_name = a_img_name.split(image_ext)[0]

                raw_img = cv2.imread(os.path.join(img_dir,
                                                  a_img_name + image_ext))
                raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

                img_short_side_len_list = self.cfgs.IMG_SHORT_SIDE_LEN if isinstance(self.cfgs.IMG_SHORT_SIDE_LEN, list) else [
                    self.cfgs.IMG_SHORT_SIDE_LEN]
                img_short_side_len_list = [img_short_side_len_list[0]] if not self.args.multi_scale else img_short_side_len_list

                for short_size in img_short_side_len_list:
                    max_len = self.cfgs.IMG_MAX_LENGTH
                    if raw_h < raw_w:
                        new_h, new_w = short_size, min(int(short_size * float(raw_w) / raw_h), max_len)
                    else:
                        new_h, new_w = min(int(short_size * float(raw_h) / raw_w), max_len), short_size
                    img_resize = cv2.resize(raw_img, (new_w, new_h))

                    output_ = \
                        sess.run(
                            [output],
                            feed_dict={img_plac: img_resize[:, :, ::-1]}
                        )

                pbar.set_description("Eval image %s" % a_img_name)

            # fw1 = open(cfgs.VERSION + '_detections_r.pkl', 'wb')
            # pickle.dump(all_boxes_r, fw1)
            return all_boxes_r