def __init__(self, cfgs, is_training): self.cfgs = cfgs self.base_network_name = cfgs.NET_NAME self.is_training = is_training if cfgs.METHOD == 'H': self.num_anchors_per_location = len(cfgs.ANCHOR_SCALES) * len(cfgs.ANCHOR_RATIOS) else: self.num_anchors_per_location = len(cfgs.ANCHOR_SCALES) * len(cfgs.ANCHOR_RATIOS) * len(cfgs.ANCHOR_ANGLES) self.method = cfgs.METHOD self.losses_dict = {} self.drawer = DrawBoxTensor(cfgs) self.backbone = BuildBackbone(cfgs, is_training) self.pretrain_zoo = PretrainModelZoo()
def read_and_prepocess_single_img(self, filename_queue, shortside_len, is_training): img_name, img, gtboxes_and_label, num_objects = self.read_single_example_and_decode( filename_queue) img = tf.cast(img, tf.float32) if is_training: if self.cfgs.RGB2GRAY: # img, gtboxes_and_label = image_preprocess.aspect_ratio_jittering(img, gtboxes_and_label) img = self.image_preprocess.random_rgb2gray( img_tensor=img, gtboxes_and_label=gtboxes_and_label) if self.cfgs.IMG_ROTATE: img, gtboxes_and_label = self.image_preprocess.random_rotate_img( img_tensor=img, gtboxes_and_label=gtboxes_and_label) img, gtboxes_and_label, img_h, img_w = self.image_preprocess.short_side_resize( img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len, length_limitation=self.cfgs.IMG_MAX_LENGTH) if self.cfgs.HORIZONTAL_FLIP: img, gtboxes_and_label = self.image_preprocess.random_flip_left_right( img_tensor=img, gtboxes_and_label=gtboxes_and_label) if self.cfgs.VERTICAL_FLIP: img, gtboxes_and_label = self.image_preprocess.random_flip_up_down( img_tensor=img, gtboxes_and_label=gtboxes_and_label) else: img, gtboxes_and_label, img_h, img_w = self.image_preprocess.short_side_resize( img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len, length_limitation=self.cfgs.IMG_MAX_LENGTH) pretrain_zoo = PretrainModelZoo() if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo: img = img / 255 - tf.constant([[self.cfgs.PIXEL_MEAN_]]) else: img = img - tf.constant([[self.cfgs.PIXEL_MEAN] ]) # sub pixel mean at last return img_name, img, gtboxes_and_label, num_objects, img_h, img_w
def draw_boxes_with_label_and_scores(self, img_array, boxes, labels, scores, method, head=None, is_csl=False, in_graph=True): if in_graph: pretrain_zoo = PretrainModelZoo() if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo: img_array = (img_array * np.array(self.cfgs.PIXEL_STD) + np.array(self.cfgs.PIXEL_MEAN_)) * 255 else: img_array = img_array + np.array(self.cfgs.PIXEL_MEAN) if method == 3: img_array = self.draw_boxes_ellipse(img_array, boxes, labels) img_array.astype(np.float32) boxes = boxes.astype(np.float32) labels = labels.astype(np.int32) img_array = np.array(img_array * 255 / np.max(img_array), dtype=np.uint8) img_obj = Image.fromarray(img_array) raw_img_obj = img_obj.copy() draw_obj = ImageDraw.Draw(img_obj) num_of_objs = 0 if head is None: head = np.ones_like(labels) * -1 for box, a_label, a_score, a_head in zip(boxes, labels, scores, head): if a_label != self.NOT_DRAW_BOXES: num_of_objs += 1 self.draw_a_rectangel_in_img(draw_obj, box, color=self.STANDARD_COLORS[a_label], width=3, method=method) if a_label == self.ONLY_DRAW_BOXES: # -1 continue elif a_label == self.ONLY_DRAW_BOXES_WITH_SCORES: # -2 self.only_draw_scores(draw_obj, box, a_score, color='White') else: if is_csl: self.draw_label_with_scores_csl(draw_obj, box, a_label, a_score, method, a_head, color='White') else: self.draw_label_with_scores(draw_obj, box, a_label, a_score, color='White') out_img_obj = Image.blend(raw_img_obj, img_obj, alpha=0.7) return np.array(out_img_obj)
NET_NAME = 'resnet50_v1d' # 'MobilenetV2' # ---------------------------------------- System ROOT_PATH = os.path.abspath('../../') print(20 * "++--") print(ROOT_PATH) GPU_GROUP = "0,1,2" NUM_GPU = len(GPU_GROUP.strip().split(',')) SHOW_TRAIN_INFO_INTE = 20 SMRY_ITER = 200 SAVE_WEIGHTS_INTE = 27000 * 2 SUMMARY_PATH = os.path.join(ROOT_PATH, 'output/summary') TEST_SAVE_PATH = os.path.join(ROOT_PATH, 'tools/test_result') pretrain_zoo = PretrainModelZoo() PRETRAINED_CKPT = pretrain_zoo.pretrain_weight_path(NET_NAME, ROOT_PATH) TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') EVALUATE_R_DIR = os.path.join(ROOT_PATH, 'output/evaluate_result_pickle/') # ------------------------------------------ Train and test RESTORE_FROM_RPN = False FIXED_BLOCKS = 1 # allow 0~3 FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone USE_07_METRIC = True ADD_BOX_IN_TENSORBOARD = True MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip CLS_WEIGHT = 1.0
def main(self): with tf.Graph().as_default() as graph, tf.device('/cpu:0'): num_gpu = len(cfgs.GPU_GROUP.strip().split(',')) global_step = slim.get_or_create_global_step() lr = self.warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) r3det = build_whole_network.DetectionNetworkR3Det(cfgs=self.cfgs, is_training=True) with tf.name_scope('get_batch'): if cfgs.IMAGE_PYRAMID: shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN) shortside_len = tf.random_shuffle(shortside_len_list)[0] else: shortside_len = cfgs.IMG_SHORT_SIDE_LEN img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ self.reader.next_batch(dataset_name=cfgs.DATASET_NAME, batch_size=cfgs.BATCH_SIZE * num_gpu, shortside_len=shortside_len, is_training=True) # data processing inputs_list = [] for i in range(num_gpu): img = tf.expand_dims(img_batch[i], axis=0) pretrain_zoo = PretrainModelZoo() if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo: img = img / tf.constant([cfgs.PIXEL_STD]) gtboxes_and_label_r = tf.py_func( backward_convert, inp=[gtboxes_and_label_batch[i]], Tout=tf.float32) gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6]) gtboxes_and_label_h = get_horizen_minAreaRectangle( gtboxes_and_label_batch[i]) gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5]) num_objects = num_objects_batch[i] num_objects = tf.cast(tf.reshape(num_objects, [ -1, ]), tf.float32) img_h = img_h_batch[i] img_w = img_w_batch[i] inputs_list.append([ img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects, img_h, img_w ]) tower_grads = [] biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer( cfgs.WEIGHT_DECAY) with tf.variable_scope(tf.get_variable_scope()): for i in range(num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope( [ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf. constant_initializer(0.0)): gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func( self.get_gtboxes_and_label, inp=[ inputs_list[i][1], inputs_list[i][2], inputs_list[i][3] ], Tout=[tf.float32, tf.float32]) gtboxes_and_label_h = tf.reshape( gtboxes_and_label_h, [-1, 5]) gtboxes_and_label_r = tf.reshape( gtboxes_and_label_r, [-1, 6]) img = inputs_list[i][0] img_shape = inputs_list[i][-2:] img = tf.image.crop_to_bounding_box( image=img, offset_height=0, offset_width=0, target_height=tf.cast( img_shape[0], tf.int32), target_width=tf.cast( img_shape[1], tf.int32)) outputs = r3det.build_whole_detection_network( input_img_batch=img, gtboxes_batch_h=gtboxes_and_label_h, gtboxes_batch_r=gtboxes_and_label_r, gpu_id=i) gtboxes_in_img_h = self.drawer.draw_boxes_with_categories( img_batch=img, boxes=gtboxes_and_label_h[:, :-1], labels=gtboxes_and_label_h[:, -1], method=0) gtboxes_in_img_r = self.drawer.draw_boxes_with_categories( img_batch=img, boxes=gtboxes_and_label_r[:, :-1], labels=gtboxes_and_label_r[:, -1], method=1) tf.summary.image( 'Compare/gtboxes_h_gpu:%d' % i, gtboxes_in_img_h) tf.summary.image( 'Compare/gtboxes_r_gpu:%d' % i, gtboxes_in_img_r) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = self.drawer.draw_boxes_with_categories_and_scores( img_batch=img, boxes=outputs[0], scores=outputs[1], labels=outputs[2], method=1) tf.summary.image( 'Compare/final_detection_gpu:%d' % i, detections_in_img) loss_dict = outputs[-1] total_loss_dict, total_losses = self.loss_dict( loss_dict, num_gpu) if i == num_gpu - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) total_losses = total_losses + tf.add_n( regularization_losses) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(total_losses) if cfgs.GRADIENT_CLIPPING_BY_NORM is not None: grads = slim.learning.clip_gradient_norms( grads, cfgs.GRADIENT_CLIPPING_BY_NORM) tower_grads.append(grads) self.log_printer(r3det, optimizer, global_step, tower_grads, total_loss_dict, num_gpu, graph)
def eval_with_plac(self, img_dir, det_net, image_ext): os.environ["CUDA_VISIBLE_DEVICES"] = self.args.gpu # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) pretrain_zoo = PretrainModelZoo() if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo: img_batch = (img_batch / 255 - tf.constant( self.cfgs.PIXEL_MEAN_)) / tf.constant(self.cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(self.cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') all_boxes_r = [] imgs = os.listdir(img_dir) pbar = tqdm(imgs) for a_img_name in pbar: a_img_name = a_img_name.split(image_ext)[0] raw_img = cv2.imread( os.path.join(img_dir, a_img_name + image_ext)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] det_boxes_r_all, det_scores_r_all, det_category_r_all = [], [], [] img_short_side_len_list = self.cfgs.IMG_SHORT_SIDE_LEN if isinstance( self.cfgs.IMG_SHORT_SIDE_LEN, list) else [self.cfgs.IMG_SHORT_SIDE_LEN] img_short_side_len_list = [ img_short_side_len_list[0] ] if not self.args.multi_scale else img_short_side_len_list for short_size in img_short_side_len_list: max_len = self.cfgs.IMG_MAX_LENGTH if raw_h < raw_w: new_h, new_w = short_size, min( int(short_size * float(raw_w) / raw_h), max_len) else: new_h, new_w = min( int(short_size * float(raw_h) / raw_w), max_len), short_size img_resize = cv2.resize(raw_img, (new_w, new_h)) resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: img_resize[:, :, ::-1]} ) if detected_boxes.shape[0] == 0: continue resized_h, resized_w = resized_img.shape[ 1], resized_img.shape[2] detected_boxes = forward_convert(detected_boxes, False) detected_boxes[:, 0::2] *= (raw_w / resized_w) detected_boxes[:, 1::2] *= (raw_h / resized_h) det_boxes_r_all.extend(detected_boxes) det_scores_r_all.extend(detected_scores) det_category_r_all.extend(detected_categories) det_boxes_r_all = np.array(det_boxes_r_all) det_scores_r_all = np.array(det_scores_r_all) det_category_r_all = np.array(det_category_r_all) box_res_rotate_ = [] label_res_rotate_ = [] score_res_rotate_ = [] if det_scores_r_all.shape[0] != 0: for sub_class in range(1, self.cfgs.CLASS_NUM + 1): index = np.where(det_category_r_all == sub_class)[0] if len(index) == 0: continue tmp_boxes_r = det_boxes_r_all[index] tmp_label_r = det_category_r_all[index] tmp_score_r = det_scores_r_all[index] if self.args.multi_scale: tmp_boxes_r_ = backward_convert(tmp_boxes_r, False) # try: # inx = nms_rotate.nms_rotate_cpu(boxes=np.array(tmp_boxes_r_), # scores=np.array(tmp_score_r), # iou_threshold=self.cfgs.NMS_IOU_THRESHOLD, # max_output_size=5000) # except: tmp_boxes_r_ = np.array(tmp_boxes_r_) tmp = np.zeros([ tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1 ]) tmp[:, 0:-1] = tmp_boxes_r_ tmp[:, -1] = np.array(tmp_score_r) # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms jitter = np.zeros([ tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1 ]) jitter[:, 0] += np.random.rand( tmp_boxes_r_.shape[0], ) / 1000 inx = rotate_gpu_nms( np.array(tmp, np.float32) + np.array(jitter, np.float32), float(self.cfgs.NMS_IOU_THRESHOLD), 0) else: inx = np.arange(0, tmp_score_r.shape[0]) box_res_rotate_.extend(np.array(tmp_boxes_r)[inx]) score_res_rotate_.extend(np.array(tmp_score_r)[inx]) label_res_rotate_.extend(np.array(tmp_label_r)[inx]) if len(box_res_rotate_) == 0: all_boxes_r.append(np.array([])) continue det_boxes_r_ = np.array(box_res_rotate_) det_scores_r_ = np.array(score_res_rotate_) det_category_r_ = np.array(label_res_rotate_) if self.args.draw_imgs: detected_indices = det_scores_r_ >= self.cfgs.VIS_SCORE detected_scores = det_scores_r_[detected_indices] detected_boxes = det_boxes_r_[detected_indices] detected_categories = det_category_r_[detected_indices] detected_boxes = backward_convert(detected_boxes, False) drawer = DrawBox(self.cfgs) det_detections_r = drawer.draw_boxes_with_label_and_scores( raw_img[:, :, ::-1], boxes=detected_boxes, labels=detected_categories, scores=detected_scores, method=1, in_graph=True) save_dir = os.path.join('test_hrsc', self.cfgs.VERSION, 'hrsc2016_img_vis') tools.makedirs(save_dir) cv2.imwrite(save_dir + '/{}.jpg'.format(a_img_name), det_detections_r[:, :, ::-1]) det_boxes_r_ = backward_convert(det_boxes_r_, False) x_c, y_c, w, h, theta = det_boxes_r_[:, 0], det_boxes_r_[:, 1], det_boxes_r_[:, 2], \ det_boxes_r_[:, 3], det_boxes_r_[:, 4] boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta])) dets_r = np.hstack((det_category_r_.reshape(-1, 1), det_scores_r_.reshape(-1, 1), boxes_r)) all_boxes_r.append(dets_r) pbar.set_description("Eval image %s" % a_img_name) # fw1 = open(cfgs.VERSION + '_detections_r.pkl', 'wb') # pickle.dump(all_boxes_r, fw1) return all_boxes_r
def worker(self, gpu_id, images, det_net, result_queue): os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) pretrain_zoo = PretrainModelZoo() if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo: img_batch = (img_batch / 255 - tf.constant(self.cfgs.PIXEL_MEAN_)) / tf.constant(self.cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(self.cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model %d ...' % gpu_id) for img_path in images: # if 'P0015' not in img_path: # continue img = cv2.imread(img_path) # img = np.load(img_path.replace('images', 'npy').replace('.png', '.npy')) box_res_rotate = [] label_res_rotate = [] score_res_rotate = [] imgH = img.shape[0] imgW = img.shape[1] img_short_side_len_list = self.cfgs.IMG_SHORT_SIDE_LEN if isinstance(self.cfgs.IMG_SHORT_SIDE_LEN, list) else [ self.cfgs.IMG_SHORT_SIDE_LEN] img_short_side_len_list = [img_short_side_len_list[0]] if not self.args.multi_scale else img_short_side_len_list if imgH < self.args.h_len: temp = np.zeros([self.args.h_len, imgW, 3], np.float32) temp[0:imgH, :, :] = img img = temp imgH = self.args.h_len if imgW < self.args.w_len: temp = np.zeros([imgH, self.args.w_len, 3], np.float32) temp[:, 0:imgW, :] = img img = temp imgW = self.args.w_len for hh in range(0, imgH, self.args.h_len - self.args.h_overlap): if imgH - hh - 1 < self.args.h_len: hh_ = imgH - self.args.h_len else: hh_ = hh for ww in range(0, imgW, self.args.w_len - self.args.w_overlap): if imgW - ww - 1 < self.args.w_len: ww_ = imgW - self.args.w_len else: ww_ = ww src_img = img[hh_:(hh_ + self.args.h_len), ww_:(ww_ + self.args.w_len), :] for short_size in img_short_side_len_list: max_len = self.cfgs.IMG_MAX_LENGTH if self.args.h_len < self.args.w_len: new_h, new_w = short_size, min(int(short_size * float(self.args.w_len) / self.args.h_len), max_len) else: new_h, new_w = min(int(short_size * float(self.args.h_len) / self.args.w_len), max_len), short_size img_resize = cv2.resize(src_img, (new_w, new_h)) resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: img_resize[:, :, ::-1]} ) resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] src_h, src_w = src_img.shape[0], src_img.shape[1] if len(det_boxes_r_) > 0: det_boxes_r_ = forward_convert(det_boxes_r_, False) det_boxes_r_[:, 0::2] *= (src_w / resized_w) det_boxes_r_[:, 1::2] *= (src_h / resized_h) for ii in range(len(det_boxes_r_)): box_rotate = det_boxes_r_[ii] box_rotate[0::2] = box_rotate[0::2] + ww_ box_rotate[1::2] = box_rotate[1::2] + hh_ box_res_rotate.append(box_rotate) label_res_rotate.append(det_category_r_[ii]) score_res_rotate.append(det_scores_r_[ii]) if self.args.flip_img: det_boxes_r_flip, det_scores_r_flip, det_category_r_flip = \ sess.run( [detection_boxes, detection_scores, detection_category], feed_dict={img_plac: cv2.flip(img_resize, flipCode=1)[:, :, ::-1]} ) if len(det_boxes_r_flip) > 0: det_boxes_r_flip = forward_convert(det_boxes_r_flip, False) det_boxes_r_flip[:, 0::2] *= (src_w / resized_w) det_boxes_r_flip[:, 1::2] *= (src_h / resized_h) for ii in range(len(det_boxes_r_flip)): box_rotate = det_boxes_r_flip[ii] box_rotate[0::2] = (src_w - box_rotate[0::2]) + ww_ box_rotate[1::2] = box_rotate[1::2] + hh_ box_res_rotate.append(box_rotate) label_res_rotate.append(det_category_r_flip[ii]) score_res_rotate.append(det_scores_r_flip[ii]) det_boxes_r_flip, det_scores_r_flip, det_category_r_flip = \ sess.run( [detection_boxes, detection_scores, detection_category], feed_dict={img_plac: cv2.flip(img_resize, flipCode=0)[:, :, ::-1]} ) if len(det_boxes_r_flip) > 0: det_boxes_r_flip = forward_convert(det_boxes_r_flip, False) det_boxes_r_flip[:, 0::2] *= (src_w / resized_w) det_boxes_r_flip[:, 1::2] *= (src_h / resized_h) for ii in range(len(det_boxes_r_flip)): box_rotate = det_boxes_r_flip[ii] box_rotate[0::2] = box_rotate[0::2] + ww_ box_rotate[1::2] = (src_h - box_rotate[1::2]) + hh_ box_res_rotate.append(box_rotate) label_res_rotate.append(det_category_r_flip[ii]) score_res_rotate.append(det_scores_r_flip[ii]) box_res_rotate = np.array(box_res_rotate) label_res_rotate = np.array(label_res_rotate) score_res_rotate = np.array(score_res_rotate) box_res_rotate_ = [] label_res_rotate_ = [] score_res_rotate_ = [] threshold = {'roundabout': 0.1, 'tennis-court': 0.3, 'swimming-pool': 0.1, 'storage-tank': 0.2, 'soccer-ball-field': 0.3, 'small-vehicle': 0.2, 'ship': 0.2, 'plane': 0.3, 'large-vehicle': 0.1, 'helicopter': 0.2, 'harbor': 0.0001, 'ground-track-field': 0.3, 'bridge': 0.0001, 'basketball-court': 0.3, 'baseball-diamond': 0.3, 'container-crane': 0.05, 'airport': 0.1, 'helipad': 0.1} for sub_class in range(1, self.cfgs.CLASS_NUM + 1): index = np.where(label_res_rotate == sub_class)[0] if len(index) == 0: continue tmp_boxes_r = box_res_rotate[index] tmp_label_r = label_res_rotate[index] tmp_score_r = score_res_rotate[index] tmp_boxes_r_ = backward_convert(tmp_boxes_r, False) # try: # inx = nms_rotate.nms_rotate_cpu(boxes=np.array(tmp_boxes_r_), # scores=np.array(tmp_score_r), # iou_threshold=threshold[self.label_name_map[sub_class]], # max_output_size=5000) # # except: tmp_boxes_r_ = np.array(tmp_boxes_r_) tmp = np.zeros([tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_r_ tmp[:, -1] = np.array(tmp_score_r) # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms jitter = np.zeros([tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1]) jitter[:, 0] += np.random.rand(tmp_boxes_r_.shape[0], ) / 1000 inx = rotate_gpu_nms(np.array(tmp, np.float32) + np.array(jitter, np.float32), float(threshold[self.label_name_map[sub_class]]), 0) box_res_rotate_.extend(np.array(tmp_boxes_r)[inx]) score_res_rotate_.extend(np.array(tmp_score_r)[inx]) label_res_rotate_.extend(np.array(tmp_label_r)[inx]) result_dict = {'boxes': np.array(box_res_rotate_), 'scores': np.array(score_res_rotate_), 'labels': np.array(label_res_rotate_), 'image_id': img_path} result_queue.put_nowait(result_dict)
def worker(self, gpu_id, images, det_net, result_queue): os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) pretrain_zoo = PretrainModelZoo() if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo: img_batch = (img_batch / 255 - tf.constant( self.cfgs.PIXEL_MEAN_)) / tf.constant(self.cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(self.cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, gpu_id=0) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model %d ...' % gpu_id) for a_img in images: raw_img = cv2.imread(a_img) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] det_boxes_r_all, det_scores_r_all, det_category_r_all = [], [], [] img_short_side_len_list = self.cfgs.IMG_SHORT_SIDE_LEN if isinstance( self.cfgs.IMG_SHORT_SIDE_LEN, list) else [self.cfgs.IMG_SHORT_SIDE_LEN] img_short_side_len_list = [ img_short_side_len_list[0] ] if not self.args.multi_scale else img_short_side_len_list for short_size in img_short_side_len_list: max_len = self.cfgs.IMG_MAX_LENGTH if raw_h < raw_w: new_h, new_w = short_size, min( int(short_size * float(raw_w) / raw_h), max_len) else: new_h, new_w = min( int(short_size * float(raw_h) / raw_w), max_len), short_size img_resize = cv2.resize(raw_img, (new_w, new_h)) resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: img_resize[:, :, ::-1]} ) detected_indices = detected_scores >= self.cfgs.VIS_SCORE detected_scores = detected_scores[detected_indices] detected_boxes = detected_boxes[detected_indices] detected_categories = detected_categories[detected_indices] if detected_boxes.shape[0] == 0: continue resized_h, resized_w = resized_img.shape[ 1], resized_img.shape[2] detected_boxes = forward_convert(detected_boxes, False) detected_boxes[:, 0::2] *= (raw_w / resized_w) detected_boxes[:, 1::2] *= (raw_h / resized_h) det_boxes_r_all.extend(detected_boxes) det_scores_r_all.extend(detected_scores) det_category_r_all.extend(detected_categories) if self.args.flip_img: detected_boxes, detected_scores, detected_categories = \ sess.run( [detection_boxes, detection_scores, detection_category], feed_dict={img_plac: cv2.flip(img_resize, flipCode=1)[:, :, ::-1]} ) detected_indices = detected_scores >= self.cfgs.VIS_SCORE detected_scores = detected_scores[detected_indices] detected_boxes = detected_boxes[detected_indices] detected_categories = detected_categories[ detected_indices] if detected_boxes.shape[0] == 0: continue resized_h, resized_w = resized_img.shape[ 1], resized_img.shape[2] detected_boxes = forward_convert(detected_boxes, False) detected_boxes[:, 0::2] *= (raw_w / resized_w) detected_boxes[:, 0::2] = (raw_w - detected_boxes[:, 0::2]) detected_boxes[:, 1::2] *= (raw_h / resized_h) det_boxes_r_all.extend(sort_corners(detected_boxes)) det_scores_r_all.extend(detected_scores) det_category_r_all.extend(detected_categories) detected_boxes, detected_scores, detected_categories = \ sess.run( [detection_boxes, detection_scores, detection_category], feed_dict={img_plac: cv2.flip(img_resize, flipCode=0)[:, :, ::-1]} ) detected_indices = detected_scores >= self.cfgs.VIS_SCORE detected_scores = detected_scores[detected_indices] detected_boxes = detected_boxes[detected_indices] detected_categories = detected_categories[ detected_indices] if detected_boxes.shape[0] == 0: continue resized_h, resized_w = resized_img.shape[ 1], resized_img.shape[2] detected_boxes = forward_convert(detected_boxes, False) detected_boxes[:, 0::2] *= (raw_w / resized_w) detected_boxes[:, 1::2] *= (raw_h / resized_h) detected_boxes[:, 1::2] = (raw_h - detected_boxes[:, 1::2]) det_boxes_r_all.extend(sort_corners(detected_boxes)) det_scores_r_all.extend(detected_scores) det_category_r_all.extend(detected_categories) det_boxes_r_all = np.array(det_boxes_r_all) det_scores_r_all = np.array(det_scores_r_all) det_category_r_all = np.array(det_category_r_all) box_res_rotate_ = [] label_res_rotate_ = [] score_res_rotate_ = [] if det_scores_r_all.shape[0] != 0: for sub_class in range(1, self.cfgs.CLASS_NUM + 1): index = np.where(det_category_r_all == sub_class)[0] if len(index) == 0: continue tmp_boxes_r = det_boxes_r_all[index] tmp_label_r = det_category_r_all[index] tmp_score_r = det_scores_r_all[index] if self.args.multi_scale: tmp_boxes_r_ = backward_convert(tmp_boxes_r, False) # try: # inx = nms_rotate.nms_rotate_cpu(boxes=np.array(tmp_boxes_r_), # scores=np.array(tmp_score_r), # iou_threshold=self.cfgs.NMS_IOU_THRESHOLD, # max_output_size=5000) # except: tmp_boxes_r_ = np.array(tmp_boxes_r_) tmp = np.zeros([ tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1 ]) tmp[:, 0:-1] = tmp_boxes_r_ tmp[:, -1] = np.array(tmp_score_r) # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms jitter = np.zeros([ tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1 ]) jitter[:, 0] += np.random.rand( tmp_boxes_r_.shape[0], ) / 1000 inx = rotate_gpu_nms( np.array(tmp, np.float32) + np.array(jitter, np.float32), float(self.cfgs.NMS_IOU_THRESHOLD), 0) else: inx = np.arange(0, tmp_score_r.shape[0]) box_res_rotate_.extend(np.array(tmp_boxes_r)[inx]) score_res_rotate_.extend(np.array(tmp_score_r)[inx]) label_res_rotate_.extend(np.array(tmp_label_r)[inx]) box_res_rotate_ = np.array(box_res_rotate_) score_res_rotate_ = np.array(score_res_rotate_) label_res_rotate_ = np.array(label_res_rotate_) result_dict = { 'scales': [1, 1], 'boxes': box_res_rotate_, 'scores': score_res_rotate_, 'labels': label_res_rotate_, 'image_id': a_img } result_queue.put_nowait(result_dict)
def __init__(self, cfgs, is_training): self.cfgs = cfgs self.base_network_name = cfgs.NET_NAME self.is_training = is_training self.fpn_func = self.fpn_mode(cfgs.FPN_MODE) self.pretrain_zoo = PretrainModelZoo()
def eval_with_plac(self, img_dir, det_net, image_ext): os.environ["CUDA_VISIBLE_DEVICES"] = self.args.gpu # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) pretrain_zoo = PretrainModelZoo() if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo: img_batch = (img_batch / 255 - tf.constant(self.cfgs.PIXEL_MEAN_)) / tf.constant(self.cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(self.cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) output = det_net.build_whole_detection_network( input_img_batch=img_batch) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') all_boxes_r = [] imgs = os.listdir(img_dir) pbar = tqdm(imgs) for a_img_name in pbar: a_img_name = a_img_name.split(image_ext)[0] raw_img = cv2.imread(os.path.join(img_dir, a_img_name + image_ext)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] img_short_side_len_list = self.cfgs.IMG_SHORT_SIDE_LEN if isinstance(self.cfgs.IMG_SHORT_SIDE_LEN, list) else [ self.cfgs.IMG_SHORT_SIDE_LEN] img_short_side_len_list = [img_short_side_len_list[0]] if not self.args.multi_scale else img_short_side_len_list for short_size in img_short_side_len_list: max_len = self.cfgs.IMG_MAX_LENGTH if raw_h < raw_w: new_h, new_w = short_size, min(int(short_size * float(raw_w) / raw_h), max_len) else: new_h, new_w = min(int(short_size * float(raw_h) / raw_w), max_len), short_size img_resize = cv2.resize(raw_img, (new_w, new_h)) output_ = \ sess.run( [output], feed_dict={img_plac: img_resize[:, :, ::-1]} ) pbar.set_description("Eval image %s" % a_img_name) # fw1 = open(cfgs.VERSION + '_detections_r.pkl', 'wb') # pickle.dump(all_boxes_r, fw1) return all_boxes_r