def detect_image(self, image): image = [np.array(image)] molded_images, image_metas, windows = mold_inputs(self.config, image) image_shape = molded_images[0].shape anchors = get_anchors(self.config, image_shape) anchors = np.broadcast_to(anchors, (1, ) + anchors.shape) detections, _, _, mrcnn_mask, _, _, _ =\ self.model.predict([molded_images, image_metas, anchors], verbose=0) final_rois, final_class_ids, final_scores, final_masks =\ unmold_detections(detections[0], mrcnn_mask[0], image[0].shape, molded_images[0].shape, windows[0]) r = { "rois": final_rois, "class_ids": final_class_ids, "scores": final_scores, "masks": final_masks, } visualize.display_instances(image[0], r['rois'], r['masks'], r['class_ids'], self.class_names, r['scores'])
def __init__(self, **kwargs): self.__dict__.update(self._defaults) for name, value in kwargs.items(): setattr(self, name, value) #---------------------------------------------------# # 计算总的类的数量 #---------------------------------------------------# self.class_names, self.num_classes = get_classes(self.classes_path) self.anchors = torch.from_numpy( get_anchors(self.input_shape, self.anchors_size, self.backbone)).type(torch.FloatTensor) if self.cuda: self.anchors = self.anchors.cuda() self.num_classes = self.num_classes + 1 #---------------------------------------------------# # 画框设置不同的颜色 #---------------------------------------------------# hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) self.bbox_util = BBoxUtility(self.num_classes) self.generate() show_config(**self._defaults)
def forward(self, h, img_size, scale=1.): """ Forward Region Proposal Network. """ n_pre_nms = 12000 n_post_nms = 2000 nms_thresh = 0.7 # get anchors predifined n, _, hh, ww = h.shape anchors = get_anchors(self.anchor_base, self.feat_stride, hh, ww) # main forward hidd = F.relu(self.conv1(h)) rpn_locs = self.loc(hidd) rpn_scores = self.score(hidd) # rpn_locs, rpn_scores rpn_locs = rpn_locs.permute(0, 2, 3, 1).contiguous().view(n, -1, 4) rpn_scores = rpn_scores.permute(0, 2, 3, 1).contiguous().view(n, -1, 2) scores = rpn_scores[:, :, 1].data.cpu().numpy()[0] # get rois, roi_indices rois = get_rois_from_loc_anchors(anchors, rpn_locs[0].data.cpu().numpy()) # clip rois[:, ::2] = np.clip(rois[:, ::2], 0, img_size[0]) rois[:, 1::2] = np.clip(rois[:, 1::2], 0, img_size[1]) # remove < min_size min_size = 16 min_size = min_size * scale hs = rois[:, 2] - rois[:, 0] ws = rois[:, 3] - rois[:, 1] if t.is_tensor(min_size): min_size = min_size.numpy() keep = np.where((hs >= min_size) & (ws >= min_size))[0] rois = rois[keep, :] scores = scores[keep] # Sort all (proposal, score) pairs by score from highest to lowest. # Take top pre_nms_topN (e.g. 6000). order = scores.ravel().argsort()[::-1] if n_pre_nms > 0: order = order[:n_pre_nms] rois = rois[order, :] # NMS keep = py_cpu_nms(rois, nms_thresh) keep = keep[:n_post_nms] rois = rois[keep] return rpn_locs, rpn_scores, rois, [0] * len(rois), anchors
def generate(self): while True: shuffle(self.train_lines) lines = self.train_lines for annotation_line in lines: img,y=self.get_random_data(annotation_line) height, width, _ = np.shape(img) if len(y)==0: continue boxes = np.array(y[:,:4],dtype=np.float32) boxes[:,0] = boxes[:,0]/width boxes[:,1] = boxes[:,1]/height boxes[:,2] = boxes[:,2]/width boxes[:,3] = boxes[:,3]/height box_heights = boxes[:,3] - boxes[:,1] box_widths = boxes[:,2] - boxes[:,0] if (box_heights<=0).any() or (box_widths<=0).any(): continue y[:,:4] = boxes[:,:4] anchors = get_anchors(get_img_output_length(width,height),width,height) # 计算真实框对应的先验框,与这个先验框应当有的预测结果 assignment = self.bbox_util.assign_boxes(y,anchors) num_regions = 256 classification = assignment[:,4] regression = assignment[:,:] mask_pos = classification[:]>0 num_pos = len(classification[mask_pos]) if num_pos > num_regions/2: val_locs = random.sample(range(num_pos), int(num_pos - num_regions/2)) classification[mask_pos][val_locs] = -1 regression[mask_pos][val_locs,-1] = -1 mask_neg = classification[:]==0 num_neg = len(classification[mask_neg]) if len(classification[mask_neg]) + num_pos > num_regions: val_locs = random.sample(range(num_neg), int(num_neg - num_pos)) classification[mask_neg][val_locs] = -1 classification = np.reshape(classification,[-1,1]) regression = np.reshape(regression,[-1,5]) tmp_inp = np.array(img) tmp_targets = [np.expand_dims(np.array(classification,dtype=np.float32),0),np.expand_dims(np.array(regression,dtype=np.float32),0)] yield preprocess_input(np.expand_dims(tmp_inp,0)), tmp_targets, np.expand_dims(y,0)
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data, image_metas, windows = resize_image([np.array(image)], self.config) #---------------------------------------------------------# # 根据当前输入图像的大小,生成先验框 #---------------------------------------------------------# anchors = np.expand_dims(get_anchors(self.config, image_data[0].shape), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# detections, _, _, mrcnn_mask, _, _, _ = self.model.predict( [image_data, image_metas, anchors], verbose=0) #---------------------------------------------------# # 上面获得的预测结果是相对于padding后的图片的 # 我们需要将预测结果转换到原图上 #---------------------------------------------------# box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess( detections[0], mrcnn_mask[0], image_shape, image_data[0].shape, windows[0]) t1 = time.time() for _ in range(test_interval): #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# detections, _, _, mrcnn_mask, _, _, _ = self.model.predict( [image_data, image_metas, anchors], verbose=0) #---------------------------------------------------# # 上面获得的预测结果是相对于padding后的图片的 # 我们需要将预测结果转换到原图上 #---------------------------------------------------# box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess( detections[0], mrcnn_mask[0], image_shape, image_data[0].shape, windows[0]) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_map_out(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data, image_metas, windows = resize_image([np.array(image)], self.config) #---------------------------------------------------------# # 根据当前输入图像的大小,生成先验框 #---------------------------------------------------------# anchors = np.expand_dims(get_anchors(self.config, image_data[0].shape), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# detections, _, _, mrcnn_mask, _, _, _ = self.model.predict( [image_data, image_metas, anchors], verbose=0) #---------------------------------------------------# # 上面获得的预测结果是相对于padding后的图片的 # 我们需要将预测结果转换到原图上 #---------------------------------------------------# box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess( detections[0], mrcnn_mask[0], image_shape, image_data[0].shape, windows[0]) outboxes = None if box_thre is not None: outboxes = np.zeros_like(box_thre) outboxes[:, [0, 2]] = box_thre[:, [1, 3]] outboxes[:, [1, 3]] = box_thre[:, [0, 2]] return outboxes, class_thre, class_ids, masks_arg, masks_sigmoid
print( 'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations' .format(mean_overlapping_bboxes, EPOCH_LENGTH)) if mean_overlapping_bboxes == 0: print( 'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.' ) X, Y, boxes = batch[0], batch[1], batch[2] loss_rpn = model_rpn.train_on_batch(X, Y) write_log(callback, ['rpn_cls_loss', 'rpn_reg_loss'], loss_rpn, train_step) P_rpn = model_rpn.predict_on_batch(X) height, width, _ = np.shape(X[0]) anchors = get_anchors(get_img_output_length(width, height), width, height) # 将预测结果进行解码 results = bbox_util.detection_out(P_rpn, anchors, 1, confidence_threshold=0) R = results[0][:, 2:] X2, Y1, Y2, IouS = calc_iou(R, config, boxes[0], width, height, NUM_CLASSES) if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0)
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) old_width, old_height = image_shape[1], image_shape[0] old_image = copy.deepcopy(image) #---------------------------------------------------------# # Reset the original image to the size of 600 short edges #---------------------------------------------------------# width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height], Image.BICUBIC) photo = np.array(image, dtype=np.float64) #-----------------------------------------------------------# # Image preprocessing and normalization. #-----------------------------------------------------------# photo = preprocess_input(np.expand_dims(photo, 0)) rpn_pred = self.model_rpn.predict(photo) #-----------------------------------------------------------# # The prediction result of the suggestion box network is decoded #-----------------------------------------------------------# base_feature_width, base_feature_height = self.get_img_output_length( width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # After obtaining the suggestion box and the shared feature layer, they are passed into the classifier for prediction #-------------------------------------------------------------# base_layer = rpn_pred[2] proposal_box = np.array(rpn_results)[:, :, 1:] temp_ROIs = np.zeros_like(proposal_box) temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]] classifier_pred = self.model_classifier.predict( [base_layer, temp_ROIs]) #-------------------------------------------------------------# # The prediction frame is obtained by decoding the suggestion box by using the prediction results of classifier #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier( classifier_pred, proposal_box, self.config, self.confidence) if len(results[0]) == 0: return old_image results = np.array(results[0]) boxes = results[:, :4] top_conf = results[:, 4] top_label_indices = results[:, 5] boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2, 1) image = old_image for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] left, top, right, bottom = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
from keras.optimizers import Adam from nets.M2det_training import Generator from nets.M2det_training import conf_loss, smooth_l1 from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping from utils.utils import BBoxUtility from utils.anchors import get_anchors if __name__ == "__main__": NUM_CLASSES = 21 input_shape = (320, 320, 3) annotation_path = '2007_train.txt' inputs = keras.layers.Input(shape=input_shape) model = M2det.m2det(NUM_CLASSES, inputs) priors = get_anchors((input_shape[0], input_shape[1])) bbox_util = BBoxUtility(NUM_CLASSES, priors) model.load_weights("model_data\M2det_weights.h5", by_name=True, skip_mismatch=True) model.summary() # 0.1用于验证,0.9用于训练 val_split = 0.1 with open(annotation_path) as f: lines = f.readlines() np.random.seed(10101) np.random.shuffle(lines) np.random.seed(None) num_val = int(len(lines) * val_split) num_train = len(lines) - num_val
def generate(self): while True: shuffle(self.train_lines) lines = self.train_lines # [拉曼光谱数据 x1,1,x2,1,0 拉曼光谱数据....] for annotation_line in lines: raman,y=self.get_random_data(annotation_line) width, height, _ = np.shape(raman) # width:1044 height:1 _=1 if len(y)==0: continue boxes = np.array(y[:,:4],dtype=np.float32) boxes[:,0] = boxes[:,0]/width boxes[:,1] = boxes[:,1]/height boxes[:,2] = boxes[:,2]/width boxes[:,3] = boxes[:,3]/height # box_heights = boxes[:,3] - boxes[:,1] # box_widths = boxes[:,2] - boxes[:,0] # if (box_heights<=0).any() or (box_widths<=0).any(): # continue y[:,:4] = boxes[:,:4] anchors = get_anchors((66,1),1044,1) # 计算真实框对应的先验框,与这个先验框应当有的预测结果 assignment = self.bbox_util.assign_boxes(y,anchors) num_regions = 16 classification = assignment[:,4] regression = assignment[:,:] mask_pos = classification[:]>0 num_pos = len(classification[mask_pos]) if num_pos > num_regions/2: val_locs = random.sample(range(num_pos), int(num_pos - num_regions/2)) temp_classification = classification[mask_pos] temp_regression = regression[mask_pos] temp_classification[val_locs] = -1 temp_regression[val_locs,-1] = -1 classification[mask_pos] = temp_classification regression[mask_pos] = temp_regression mask_neg = classification[:]==0 num_neg = len(classification[mask_neg]) mask_pos = classification[:]>0 num_pos = len(classification[mask_pos]) if len(classification[mask_neg]) + num_pos > num_regions: val_locs = random.sample(range(num_neg), int(num_neg + num_pos - num_regions)) temp_classification = classification[mask_neg] temp_classification[val_locs] = -1 classification[mask_neg] = temp_classification classification = np.reshape(classification,[-1,1]) regression = np.reshape(regression,[-1,5]) tmp_inp = np.array(raman) tmp_inp = (tmp_inp - (np.min(tmp_inp)))/(np.max(tmp_inp) - np.min(tmp_inp)) tmp_targets = [np.expand_dims(np.array(classification,dtype=np.float32),0),np.expand_dims(np.array(regression,dtype=np.float32),0)] yield np.expand_dims(tmp_inp,0), tmp_targets, np.expand_dims(y,0)
train_annotation_path = '2007_train.txt' val_annotation_path = '2007_val.txt' #------------------------------------------------------# # 设置用到的显卡 #------------------------------------------------------# os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in train_gpu) ngpus_per_node = len(train_gpu) print('Number of devices: {}'.format(ngpus_per_node)) #----------------------------------------------------# # 获取classes和anchor #----------------------------------------------------# class_names, num_classes = get_classes(classes_path) num_classes += 1 anchors = get_anchors(input_shape, anchors_size) K.clear_session() model_body = m2det((input_shape[0], input_shape[1], 3), num_classes) if model_path != '': #------------------------------------------------------# # 载入预训练权重 #------------------------------------------------------# print('Load weights {}.'.format(model_path)) model_body.load_weights(model_path, by_name=True, skip_mismatch=True) if ngpus_per_node > 1: model = multi_gpu_model(model_body, gpus=ngpus_per_node) else: model = model_body
import numpy as np import keras from keras.optimizers import Adam from nets.retinanet_training import Generator from nets.retinanet_training import focal, smooth_l1 from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping from utils.utils import BBoxUtility from utils.anchors import get_anchors if __name__ == "__main__": NUM_CLASSES = 20 input_shape = (600, 600, 3) annotation_path = '2007_train.txt' inputs = keras.layers.Input(shape=input_shape) model = retinanet.resnet_retinanet(NUM_CLASSES, inputs) priors = get_anchors(model) bbox_util = BBoxUtility(NUM_CLASSES, priors) #-------------------------------------------# # 权值文件的下载请看README #-------------------------------------------# model.load_weights("model_data/resnet50_coco_best_v2.1.0.h5", by_name=True, skip_mismatch=True) # 0.1用于验证,0.9用于训练 val_split = 0.1 with open(annotation_path) as f: lines = f.readlines() np.random.seed(10101) np.random.shuffle(lines)
def get_map_txt(self, image_id, image, class_names, map_out_path): f = open( os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w") #---------------------------------------------------# # 计算输入图片的高和宽 #---------------------------------------------------# image_shape = np.array(np.shape(image)[0:2]) input_shape = get_new_img_size(image_shape[0], image_shape[1]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给原图像进行resize,resize到短边为600的大小上 #---------------------------------------------------------# image_data = resize_image(image, [input_shape[1], input_shape[0]]) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( preprocess_input(np.array(image_data, dtype='float32')), 0) #---------------------------------------------------------# # 获得rpn网络预测结果和base_layer #---------------------------------------------------------# rpn_pred = self.model_rpn(image_data) rpn_pred = [x.numpy() for x in rpn_pred] #---------------------------------------------------------# # 生成先验框并解码 #---------------------------------------------------------# anchors = get_anchors(input_shape, self.backbone, self.anchors_size) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # 利用建议框获得classifier网络预测结果 #-------------------------------------------------------------# classifier_pred = self.model_classifier( [rpn_pred[2], rpn_results[:, :, [1, 0, 3, 2]]]) classifier_pred = [x.numpy() for x in classifier_pred] #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier( classifier_pred, rpn_results, image_shape, input_shape, self.confidence) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# if len(results[0]) <= 0: return top_label = np.array(results[0][:, 5], dtype='int32') top_conf = results[0][:, 4] top_boxes = results[0][:, :4] for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = str(top_conf[i]) top, left, bottom, right = box if predicted_class not in class_names: continue f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
import numpy as np import pickle import tensorflow as tf import cv2 import keras import os import sys if __name__ == "__main__": log_dir = "logs/" annotation_path = '2007_train.txt' NUM_CLASSES = 21 input_shape = (300, 300, 3) priors = get_anchors() bbox_util = BBoxUtility(NUM_CLASSES, priors) # 0.1用于验证,0.9用于训练 val_split = 0.1 with open(annotation_path) as f: lines = f.readlines() np.random.seed(10101) np.random.shuffle(lines) np.random.seed(None) num_val = int(len(lines)*val_split) num_train = len(lines) - num_val model = rfb300(input_shape, num_classes=NUM_CLASSES) model.load_weights("model_data/rfb_weights.h5", by_name=True, skip_mismatch=True) # 训练参数设置
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] old_image = copy.deepcopy(image) width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height]) photo = np.array(image, dtype=np.float64) # 图片预处理,归一化 photo = preprocess_input(np.expand_dims(photo, 0)) preds = self.model_rpn.predict(photo) # 将预测结果进行解码 anchors = get_anchors(self.get_img_output_length(width, height), width, height) rpn_results = self.bbox_util.detection_out(preds, anchors, 1, confidence_threshold=0) R = rpn_results[0][:, 2:] R[:, 0] = np.array(np.round(R[:, 0] * width / self.config.rpn_stride), dtype=np.int32) R[:, 1] = np.array(np.round(R[:, 1] * height / self.config.rpn_stride), dtype=np.int32) R[:, 2] = np.array(np.round(R[:, 2] * width / self.config.rpn_stride), dtype=np.int32) R[:, 3] = np.array(np.round(R[:, 3] * height / self.config.rpn_stride), dtype=np.int32) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] base_layer = preds[2] delete_line = [] for i, r in enumerate(R): if r[2] < 1 or r[3] < 1: delete_line.append(i) R = np.delete(R, delete_line, axis=0) bboxes = [] probs = [] labels = [] for jk in range(R.shape[0] // self.config.num_rois + 1): ROIs = np.expand_dims(R[self.config.num_rois * jk:self.config.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // self.config.num_rois: #pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], self.config.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = self.model_classifier.predict([base_layer, ROIs]) for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :-1]) < self.confidence: continue label = np.argmax(P_cls[0, ii, :-1]) (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :-1]) (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= self.config.classifier_regr_std[0] ty /= self.config.classifier_regr_std[1] tw /= self.config.classifier_regr_std[2] th /= self.config.classifier_regr_std[3] cx = x + w / 2. cy = y + h / 2. cx1 = tx * w + cx cy1 = ty * h + cy w1 = math.exp(tw) * w h1 = math.exp(th) * h x1 = cx1 - w1 / 2. y1 = cy1 - h1 / 2. x2 = cx1 + w1 / 2 y2 = cy1 + h1 / 2 x1 = int(round(x1)) y1 = int(round(y1)) x2 = int(round(x2)) y2 = int(round(y2)) bboxes.append([x1, y1, x2, y2]) probs.append(np.max(P_cls[0, ii, :-1])) labels.append(label) if len(bboxes) == 0: return old_image # 筛选出其中得分高于confidence的框 labels = np.array(labels) probs = np.array(probs) boxes = np.array(bboxes, dtype=np.float32) boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / width boxes[:, 1] = boxes[:, 1] * self.config.rpn_stride / height boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / width boxes[:, 3] = boxes[:, 3] * self.config.rpn_stride / height results = np.array( self.bbox_util.nms_for_out(np.array(labels), np.array(probs), np.array(boxes), self.num_classes - 1, 0.4)) top_label_indices = results[:, 0] top_conf = results[:, 1] boxes = results[:, 2:] boxes[:, 0] = boxes[:, 0] * old_width boxes[:, 1] = boxes[:, 1] * old_height boxes[:, 2] = boxes[:, 2] * old_width boxes[:, 3] = boxes[:, 3] * old_height font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2 image = old_image for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] left, top, right, bottom = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def _get_prior(self): data = get_anchors(self.retinanet_model) return data
def detect_image(self, image_id, image): self.confidence = 0.05 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] old_image = copy.deepcopy(image) width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height]) photo = np.array(image, dtype=np.float64) # 图片预处理,归一化 photo = preprocess_input(np.expand_dims(photo, 0)) preds = self.model_rpn.predict(photo) # 将预测结果进行解码 anchors = get_anchors(self.get_img_output_length(width, height), width, height) rpn_results = self.bbox_util.detection_out(preds, anchors, 1, confidence_threshold=0) R = rpn_results[0][:, 2:] R[:, 0] = np.array(np.round(R[:, 0] * width / self.config.rpn_stride), dtype=np.int32) R[:, 1] = np.array(np.round(R[:, 1] * height / self.config.rpn_stride), dtype=np.int32) R[:, 2] = np.array(np.round(R[:, 2] * width / self.config.rpn_stride), dtype=np.int32) R[:, 3] = np.array(np.round(R[:, 3] * height / self.config.rpn_stride), dtype=np.int32) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] base_layer = preds[2] delete_line = [] for i, r in enumerate(R): if r[2] < 1 or r[3] < 1: delete_line.append(i) R = np.delete(R, delete_line, axis=0) bboxes = [] probs = [] labels = [] for jk in range(R.shape[0] // self.config.num_rois + 1): ROIs = np.expand_dims(R[self.config.num_rois * jk:self.config.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // self.config.num_rois: #pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], self.config.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = self.model_classifier.predict([base_layer, ROIs]) for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :]) < self.confidence or np.argmax( P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue label = np.argmax(P_cls[0, ii, :]) (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= self.config.classifier_regr_std[0] ty /= self.config.classifier_regr_std[1] tw /= self.config.classifier_regr_std[2] th /= self.config.classifier_regr_std[3] cx = x + w / 2. cy = y + h / 2. cx1 = tx * w + cx cy1 = ty * h + cy w1 = math.exp(tw) * w h1 = math.exp(th) * h x1 = cx1 - w1 / 2. y1 = cy1 - h1 / 2. x2 = cx1 + w1 / 2 y2 = cy1 + h1 / 2 x1 = int(round(x1)) y1 = int(round(y1)) x2 = int(round(x2)) y2 = int(round(y2)) bboxes.append([x1, y1, x2, y2]) probs.append(np.max(P_cls[0, ii, :])) labels.append(label) if len(bboxes) == 0: return old_image # 筛选出其中得分高于confidence的框 labels = np.array(labels) probs = np.array(probs) boxes = np.array(bboxes, dtype=np.float32) boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / width boxes[:, 1] = boxes[:, 1] * self.config.rpn_stride / height boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / width boxes[:, 3] = boxes[:, 3] * self.config.rpn_stride / height results = np.array( self.bbox_util.nms_for_out(np.array(labels), np.array(probs), np.array(boxes), self.num_classes - 1, 0.4)) top_label_indices = results[:, 0] top_conf = results[:, 1] boxes = results[:, 2:] boxes[:, 0] = boxes[:, 0] * old_width boxes[:, 1] = boxes[:, 1] * old_height boxes[:, 2] = boxes[:, 2] * old_width boxes[:, 3] = boxes[:, 3] * old_height for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = str(top_conf[i]) left, top, right, bottom = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
print('Epoch {}/{}'.format(i + 1, EPOCH)) for iteration, batch in enumerate(rpn_train): if len(rpn_accuracy_rpn_monitor) == EPOCH_LENGTH and config.verbose: mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor))/len(rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(mean_overlapping_bboxes, EPOCH_LENGTH)) if mean_overlapping_bboxes == 0: print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.') X, Y, boxes = batch[0],batch[1],batch[2] loss_rpn = model_rpn.train_on_batch(X,Y) write_log(callback, ['rpn_cls_loss', 'rpn_reg_loss'], loss_rpn, train_step) P_rpn = model_rpn.predict_on_batch(X) width,height,_ = np.shape(X[0]) anchors = get_anchors((66,1),width,height) # 将预测结果进行解码 P_rpn[1][...,3] = 1 anchors[:,1] = 0 results = bbox_util.detection_out(P_rpn,anchors,1, confidence_threshold=0) R = results[0][:, 2:] X2, Y1, Y2, IouS = calc_iou(R, config, boxes[0], width, height, NUM_CLASSES) if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue
def get_train_model(config): h, w = config.IMAGE_SHAPE[:2] if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): raise Exception("Image size must be dividable by 2 at least 6 times " "to avoid fractions when downscaling and upscaling." "For example, use 256, 320, 384, 448, 512, ... etc. ") # 输入进来的图片必须是2的6次方以上的倍数 input_image = Input(shape=[None, None, config.IMAGE_SHAPE[2]], name="input_image") # meta包含了一些必要信息 input_image_meta = Input(shape=[config.IMAGE_META_SIZE], name="input_image_meta") # RPN建议框网络的真实框信息 input_rpn_match = Input(shape=[None, 1], name="input_rpn_match", dtype=tf.int32) input_rpn_bbox = Input(shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32) # 种类信息 input_gt_class_ids = Input(shape=[None], name="input_gt_class_ids", dtype=tf.int32) # 框的位置信息 input_gt_boxes = Input(shape=[None, 4], name="input_gt_boxes", dtype=tf.float32) # 标准化到0-1之间 gt_boxes = Lambda(lambda x: norm_boxes_graph(x, K.shape(input_image)[1:3]))( input_gt_boxes) # mask语义分析信息 # [batch, height, width, MAX_GT_INSTANCES] if config.USE_MINI_MASK: input_gt_masks = Input( shape=[config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1], None], name="input_gt_masks", dtype=bool) else: input_gt_masks = Input( shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None], name="input_gt_masks", dtype=bool) # 获得Resnet里的压缩程度不同的一些层 _, C2, C3, C4, C5 = get_resnet(input_image, stage5=True, train_bn=config.TRAIN_BN) # 组合成特征金字塔的结构 # P5长宽共压缩了5次 # Height/32,Width/32,256 P5 = Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c5p5')(C5) # P4长宽共压缩了4次 # Height/16,Width/16,256 P4 = Add(name="fpn_p4add")([ UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5), Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c4p4')(C4) ]) # P4长宽共压缩了3次 # Height/8,Width/8,256 P3 = Add(name="fpn_p3add")([ UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4), Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c3p3')(C3) ]) # P4长宽共压缩了2次 # Height/4,Width/4,256 P2 = Add(name="fpn_p2add")([ UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3), Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c2p2')(C2) ]) # 各自进行一次256通道的卷积,此时P2、P3、P4、P5通道数相同 # Height/4,Width/4,256 P2 = Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p2")(P2) # Height/8,Width/8,256 P3 = Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p3")(P3) # Height/16,Width/16,256 P4 = Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p4")(P4) # Height/32,Width/32,256 P5 = Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p5")(P5) # 在建议框网络里面还有一个P6用于获取建议框 # Height/64,Width/64,256 P6 = MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5) # P2, P3, P4, P5, P6可以用于获取建议框 rpn_feature_maps = [P2, P3, P4, P5, P6] # P2, P3, P4, P5用于获取mask信息 mrcnn_feature_maps = [P2, P3, P4, P5] anchors = get_anchors(config, config.IMAGE_SHAPE) # 拓展anchors的shape,第一个维度拓展为batch_size anchors = np.broadcast_to(anchors, (config.BATCH_SIZE, ) + anchors.shape) # 将anchors转化成tensor的形式 anchors = Lambda(lambda x: tf.Variable(anchors), name="anchors")(input_image) # 建立RPN模型 rpn = build_rpn_model(len(config.RPN_ANCHOR_RATIOS), config.TOP_DOWN_PYRAMID_SIZE) rpn_class_logits, rpn_class, rpn_bbox = [], [], [] # 获得RPN网络的预测结果,进行格式调整,把五个特征层的结果进行堆叠 for p in rpn_feature_maps: logits, classes, bbox = rpn([p]) rpn_class_logits.append(logits) rpn_class.append(classes) rpn_bbox.append(bbox) rpn_class_logits = Concatenate(axis=1, name="rpn_class_logits")(rpn_class_logits) rpn_class = Concatenate(axis=1, name="rpn_class")(rpn_class) rpn_bbox = Concatenate(axis=1, name="rpn_bbox")(rpn_bbox) # 此时获得的rpn_class_logits、rpn_class、rpn_bbox的维度是 # rpn_class_logits : Batch_size, num_anchors, 2 # rpn_class : Batch_size, num_anchors, 2 # rpn_bbox : Batch_size, num_anchors, 4 proposal_count = config.POST_NMS_ROIS_TRAINING # Batch_size, proposal_count, 4 rpn_rois = ProposalLayer(proposal_count=proposal_count, nms_threshold=config.RPN_NMS_THRESHOLD, name="ROI", config=config)([rpn_class, rpn_bbox, anchors]) active_class_ids = Lambda(lambda x: parse_image_meta_graph(x)[ "active_class_ids"])(input_image_meta) if not config.USE_RPN_ROIS: # 使用外部输入的建议框 input_rois = Input(shape=[config.POST_NMS_ROIS_TRAINING, 4], name="input_roi", dtype=np.int32) # Normalize coordinates target_rois = Lambda( lambda x: norm_boxes_graph(x, K.shape(input_image)[1:3]))(input_rois) else: # 利用预测到的建议框进行下一步的操作 target_rois = rpn_rois """找到建议框的ground_truth Inputs: proposals: [batch, N, (y1, x1, y2, x2)]建议框 gt_class_ids: [batch, MAX_GT_INSTANCES]每个真实框对应的类 gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]真实框的位置 gt_masks: [batch, height, width, MAX_GT_INSTANCES]真实框的语义分割情况 Returns: rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)]内部真实存在目标的建议框 target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]每个建议框对应的类 target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)]每个建议框应该有的调整参数 target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width]每个建议框语义分割情况 """ rois, target_class_ids, target_bbox, target_mask =\ DetectionTargetLayer(config, name="proposal_targets")([ target_rois, input_gt_class_ids, gt_boxes, input_gt_masks]) # 找到合适的建议框的classifier预测结果 mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\ fpn_classifier_graph(rois, mrcnn_feature_maps, input_image_meta, config.POOL_SIZE, config.NUM_CLASSES, train_bn=config.TRAIN_BN, fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE) # 找到合适的建议框的mask预测结果 mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_maps, input_image_meta, config.MASK_POOL_SIZE, config.NUM_CLASSES, train_bn=config.TRAIN_BN) output_rois = Lambda(lambda x: x * 1, name="output_rois")(rois) # Losses rpn_class_loss = Lambda(lambda x: rpn_class_loss_graph(*x), name="rpn_class_loss")( [input_rpn_match, rpn_class_logits]) rpn_bbox_loss = Lambda(lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")( [input_rpn_bbox, input_rpn_match, rpn_bbox]) class_loss = Lambda(lambda x: mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")([ target_class_ids, mrcnn_class_logits, active_class_ids ]) bbox_loss = Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")( [target_bbox, target_class_ids, mrcnn_bbox]) mask_loss = Lambda(lambda x: mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss")( [target_mask, target_class_ids, mrcnn_mask]) # Model inputs = [ input_image, input_image_meta, input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes, input_gt_masks ] if not config.USE_RPN_ROIS: inputs.append(input_rois) outputs = [ rpn_class_logits, rpn_class, rpn_bbox, mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, output_rois, rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss ] model = Model(inputs, outputs, name='mask_rcnn') return model
def detect_image(self, image): #-------------------------------------# # 转换成RGB图片,可以用于灰度图预测。 #-------------------------------------# image = image.convert("RGB") image_shape = np.array(np.shape(image)[0:2]) old_width, old_height = image_shape[1], image_shape[0] old_image = copy.deepcopy(image) #---------------------------------------------------------# # 给原图像进行resize,resize到短边为600的大小上 #---------------------------------------------------------# width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height], Image.BICUBIC) photo = np.array(image, dtype=np.float64) #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = preprocess_input(np.expand_dims(photo, 0)) rpn_pred = self.model_rpn_get_pred(photo) rpn_pred = [x.numpy() for x in rpn_pred] #-----------------------------------------------------------# # 将建议框网络的预测结果进行解码 #-----------------------------------------------------------# base_feature_width, base_feature_height = self.get_img_output_length( width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # 在获得建议框和共享特征层后,将二者传入classifier中进行预测 #-------------------------------------------------------------# base_layer = rpn_pred[2] proposal_box = np.array(rpn_results)[:, :, 1:] temp_ROIs = np.zeros_like(proposal_box) temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]] classifier_pred = self.model_classifier_get_pred( [base_layer, temp_ROIs]) classifier_pred = [x.numpy() for x in classifier_pred] #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier( classifier_pred, proposal_box, self.config, self.confidence) if len(results[0]) == 0: return old_image results = np.array(results[0]) boxes = results[:, :4] top_conf = results[:, 4] top_label_indices = results[:, 5] boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2, 1) image = old_image for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] left, top, right, bottom = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def fit_one_epoch(model_rpn, model_all, epoch, epoch_size, epoch_size_val, gen, genval, Epoch, callback): total_loss = 0 rpn_loc_loss = 0 rpn_cls_loss = 0 roi_loc_loss = 0 roi_cls_loss = 0 val_toal_loss = 0 with tqdm(total=epoch_size, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar: for iteration, batch in enumerate(gen): if iteration >= epoch_size: break X, Y, boxes = batch[0], batch[1], batch[2] P_rpn = model_rpn.predict_on_batch(X) height, width, _ = np.shape(X[0]) base_feature_width, base_feature_height = get_img_output_length( width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) results = bbox_util.detection_out_rpn(P_rpn, anchors) roi_inputs = [] out_classes = [] out_regrs = [] for i in range(len(X)): R = results[i][:, 1:] X2, Y1, Y2 = calc_iou(R, config, boxes[i], NUM_CLASSES) roi_inputs.append(X2) out_classes.append(Y1) out_regrs.append(Y2) loss_class = model_all.train_on_batch( [X, np.array(roi_inputs)], [Y[0], Y[1], np.array(out_classes), np.array(out_regrs)]) write_log(callback, [ 'total_loss', 'rpn_cls_loss', 'rpn_reg_loss', 'detection_cls_loss', 'detection_reg_loss' ], loss_class, iteration) rpn_cls_loss += loss_class[1] rpn_loc_loss += loss_class[2] roi_cls_loss += loss_class[3] roi_loc_loss += loss_class[4] total_loss = rpn_loc_loss + rpn_cls_loss + roi_loc_loss + roi_cls_loss pbar.set_postfix( **{ 'total': total_loss / (iteration + 1), 'rpn_cls': rpn_cls_loss / (iteration + 1), 'rpn_loc': rpn_loc_loss / (iteration + 1), 'roi_cls': roi_cls_loss / (iteration + 1), 'roi_loc': roi_loc_loss / (iteration + 1), 'lr': K.get_value(model_rpn.optimizer.lr) }) pbar.update(1) print('Start Validation') with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar: for iteration, batch in enumerate(genval): if iteration >= epoch_size_val: break X, Y, boxes = batch[0], batch[1], batch[2] P_rpn = model_rpn.predict_on_batch(X) height, width, _ = np.shape(X[0]) base_feature_width, base_feature_height = get_img_output_length( width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) results = bbox_util.detection_out_rpn(P_rpn, anchors) roi_inputs = [] out_classes = [] out_regrs = [] for i in range(len(X)): R = results[i][:, 1:] X2, Y1, Y2 = calc_iou(R, config, boxes[i], NUM_CLASSES) roi_inputs.append(X2) out_classes.append(Y1) out_regrs.append(Y2) loss_class = model_all.test_on_batch( [X, np.array(roi_inputs)], [Y[0], Y[1], np.array(out_classes), np.array(out_regrs)]) val_toal_loss += loss_class[0] pbar.set_postfix(**{'total': val_toal_loss / (iteration + 1)}) pbar.update(1) loss_history.append_loss(total_loss / (epoch_size + 1), val_toal_loss / (epoch_size_val + 1)) print('Finish Validation') print('Epoch:' + str(epoch + 1) + '/' + str(Epoch)) print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss / (epoch_size + 1), val_toal_loss / (epoch_size_val + 1))) print('Saving state, iter:', str(epoch + 1)) model_all.save_weights('logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.h5' % ((epoch + 1), total_loss / (epoch_size + 1), val_toal_loss / (epoch_size_val + 1))) return
def generate(self): while True: shuffle(self.train_lines) lines = self.train_lines inputs = [] target0 = [] target1 = [] target2 = [] for annotation_line in lines: img, y = self.get_random_data(annotation_line) height, width, _ = np.shape(img) if len(y) > 0: boxes = np.array(y[:, :4], dtype=np.float32) boxes[:, 0] = boxes[:, 0] / width boxes[:, 1] = boxes[:, 1] / height boxes[:, 2] = boxes[:, 2] / width boxes[:, 3] = boxes[:, 3] / height y[:, :4] = boxes[:, :4] anchors = get_anchors(get_img_output_length(width, height), width, height) #---------------------------------------------------# # assignment分为2个部分,它的shape为 :, 5 # :, :4 的内容为网络应该有的回归预测结果 # :, 4 的内容为先验框是否包含物体,默认为背景 #---------------------------------------------------# assignment = self.bbox_util.assign_boxes(y, anchors) classification = assignment[:, 4] regression = assignment[:, :] #---------------------------------------------------# # 对正样本与负样本进行筛选,训练样本总和为256 #---------------------------------------------------# mask_pos = classification[:] > 0 num_pos = len(classification[mask_pos]) if num_pos > self.num_regions / 2: val_locs = random.sample( range(num_pos), int(num_pos - self.num_regions / 2)) temp_classification = classification[mask_pos] temp_regression = regression[mask_pos] temp_classification[val_locs] = -1 temp_regression[val_locs, -1] = -1 classification[mask_pos] = temp_classification regression[mask_pos] = temp_regression mask_neg = classification[:] == 0 num_neg = len(classification[mask_neg]) mask_pos = classification[:] > 0 num_pos = len(classification[mask_pos]) if len(classification[mask_neg]) + num_pos > self.num_regions: val_locs = random.sample( range(num_neg), int(num_neg + num_pos - self.num_regions)) temp_classification = classification[mask_neg] temp_classification[val_locs] = -1 classification[mask_neg] = temp_classification inputs.append(np.array(img)) target0.append(np.reshape(classification, [-1, 1])) target1.append(np.reshape(regression, [-1, 5])) target2.append(y) if len(inputs) == self.Batch_size: tmp_inp = np.array(inputs) tmp_targets = [ np.array(target0, np.float32), np.array(target1, np.float32) ] tmp_y = target2 yield preprocess_input(tmp_inp), tmp_targets, tmp_y inputs = [] target0 = [] target1 = [] target2 = []
def _get_prior(self): data = get_anchors(image_sizes[self.phi]) return data
# 训练前,请指定好phi和model_path # 二者所使用Efficientdet版本要相同 #-------------------------------------------# phi = 0 annotation_path = '2007_train.txt' classes_path = 'model_data/new_classes.txt' class_names = get_classes(classes_path) NUM_CLASSES = len(class_names) #-------------------------------------------# # 权值文件的下载请看README #-------------------------------------------# model_path = "model_data/efficientdet-d0-voc.h5" model = Efficientdet(phi, num_classes=NUM_CLASSES) priors = get_anchors(image_sizes[phi]) bbox_util = BBoxUtility(NUM_CLASSES, priors) model.load_weights(model_path, by_name=True, skip_mismatch=True) # 0.1用于验证,0.9用于训练 val_split = 0.1 with open(annotation_path) as f: lines = f.readlines() np.random.seed(10101) np.random.shuffle(lines) np.random.seed(None) num_val = int(len(lines) * val_split) num_train = len(lines) - num_val # 训练参数设置
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) image_origin = np.array(image, np.uint8) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data, image_metas, windows = resize_image([np.array(image)], self.config) #---------------------------------------------------------# # 根据当前输入图像的大小,生成先验框 #---------------------------------------------------------# anchors = np.expand_dims(get_anchors(self.config, image_data[0].shape), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# detections, _, _, mrcnn_mask, _, _, _ = self.model.predict( [image_data, image_metas, anchors], verbose=0) #---------------------------------------------------# # 上面获得的预测结果是相对于padding后的图片的 # 我们需要将预测结果转换到原图上 #---------------------------------------------------# box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess( detections[0], mrcnn_mask[0], image_shape, image_data[0].shape, windows[0]) if box_thre is None: return image #----------------------------------------------------------------------# # masks_class [image_shape[0], image_shape[1]] # 根据每个像素点所属的实例和是否满足门限需求,判断每个像素点的种类 #----------------------------------------------------------------------# masks_class = masks_sigmoid * (class_ids[None, None, :] + 1) masks_class = np.reshape(masks_class, [-1, np.shape(masks_sigmoid)[-1]]) masks_class = np.reshape( masks_class[np.arange(np.shape(masks_class)[0]), np.reshape(masks_arg, [-1])], [image_shape[0], image_shape[1]]) #---------------------------------------------------------# # 设置字体与边框厚度 #---------------------------------------------------------# scale = 0.6 thickness = int( max((image.size[0] + image.size[1]) // self.IMAGE_MAX_DIM, 1)) font = cv2.FONT_HERSHEY_DUPLEX color_masks = self.colors[masks_class].astype('uint8') image_fused = cv2.addWeighted(color_masks, 0.4, image_origin, 0.6, gamma=0) for i in range(np.shape(class_ids)[0]): top, left, bottom, right = np.array(box_thre[i, :], np.int32) #---------------------------------------------------------# # 获取颜色并绘制预测框 #---------------------------------------------------------# color = self.colors[class_ids[i] + 1].tolist() cv2.rectangle(image_fused, (left, top), (right, bottom), color, thickness) #---------------------------------------------------------# # 获得这个框的种类并写在图片上 #---------------------------------------------------------# class_name = self.class_names[class_ids[i]] print(class_name, top, left, bottom, right) text_str = f'{class_name}: {class_thre[i]:.2f}' text_w, text_h = cv2.getTextSize(text_str, font, scale, 1)[0] cv2.rectangle(image_fused, (left, top), (left + text_w, top + text_h + 5), color, -1) cv2.putText(image_fused, text_str, (left, top + 15), font, scale, (255, 255, 255), 1, cv2.LINE_AA) image = Image.fromarray(np.uint8(image_fused)) return image
def get_FPS(self, image, test_interval): #-------------------------------------# # 转换成RGB图片,可以用于灰度图预测。 #-------------------------------------# image = image.convert("RGB") image_shape = np.array(np.shape(image)[0:2]) old_width, old_height = image_shape[1], image_shape[0] #---------------------------------------------------------# # 给原图像进行resize,resize到短边为600的大小上 #---------------------------------------------------------# width, height = get_new_img_size(old_width, old_height) image = image.resize([width,height], Image.BICUBIC) photo = np.array(image,dtype = np.float64) #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = preprocess_input(np.expand_dims(photo,0)) rpn_pred = self.model_rpn.predict(photo) #-----------------------------------------------------------# # 将建议框网络的预测结果进行解码 #-----------------------------------------------------------# base_feature_width, base_feature_height = self.get_img_output_length(width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # 在获得建议框和共享特征层后,将二者传入classifier中进行预测 #-------------------------------------------------------------# base_layer = rpn_pred[2] proposal_box = np.array(rpn_results)[:, :, 1:] temp_ROIs = np.zeros_like(proposal_box) temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]] classifier_pred = self.model_classifier.predict([base_layer, temp_ROIs]) #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier(classifier_pred, proposal_box, self.config, self.confidence) if len(results[0])>0: results = np.array(results[0]) boxes = results[:, :4] top_conf = results[:, 4] top_label_indices = results[:, 5] boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height t1 = time.time() for _ in range(test_interval): rpn_pred = self.model_rpn.predict(photo) #-----------------------------------------------------------# # 将建议框网络的预测结果进行解码 #-----------------------------------------------------------# base_feature_width, base_feature_height = self.get_img_output_length(width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # 在获得建议框和共享特征层后,将二者传入classifier中进行预测 #-------------------------------------------------------------# base_layer = rpn_pred[2] proposal_box = np.array(rpn_results)[:, :, 1:] temp_ROIs = np.zeros_like(proposal_box) temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]] classifier_pred = self.model_classifier.predict([base_layer, temp_ROIs]) #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier(classifier_pred, proposal_box, self.config, self.confidence) if len(results[0])>0: results = np.array(results[0]) boxes = results[:, :4] top_conf = results[:, 4] top_label_indices = results[:, 5] boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def _get_prior(self): data = get_anchors() return data
local_rank = 0 if pretrained: if distributed: if local_rank == 0: download_weights(backbone) dist.barrier() else: download_weights(backbone) #----------------------------------------------------# # 获取classes和anchor #----------------------------------------------------# class_names, num_classes = get_classes(classes_path) num_classes += 1 anchors = get_anchors(input_shape, anchors_size, backbone) model = SSD300(num_classes, backbone, pretrained) if not pretrained: weights_init(model) if model_path != '': #------------------------------------------------------# # 权值文件请看README,百度网盘下载 #------------------------------------------------------# if local_rank == 0: print('Load weights {}.'.format(model_path)) #------------------------------------------------------# # 根据预训练权重的Key和模型的Key进行加载 #------------------------------------------------------# model_dict = model.state_dict()
def detect_image(self, image_id, image): self.confidence = 0.01 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) old_width, old_height = image_shape[1], image_shape[0] old_image = copy.deepcopy(image) #---------------------------------------------------------# # 给原图像进行resize,resize到短边为600的大小上 #---------------------------------------------------------# width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height], Image.BICUBIC) photo = np.array(image, dtype=np.float64) #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = preprocess_input(np.expand_dims(photo, 0)) rpn_pred = self.model_rpn.predict(photo) #-----------------------------------------------------------# # 将建议框网络的预测结果进行解码 #-----------------------------------------------------------# base_feature_width, base_feature_height = self.get_img_output_length( width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # 在获得建议框和共享特征层后,将二者传入classifier中进行预测 #-------------------------------------------------------------# base_layer = rpn_pred[2] proposal_box = np.array(rpn_results)[:, :, 1:] temp_ROIs = np.zeros_like(proposal_box) temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]] classifier_pred = self.model_classifier.predict( [base_layer, temp_ROIs]) #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier( classifier_pred, proposal_box, self.config, self.confidence) if len(results[0]) == 0: return old_image results = np.array(results[0]) boxes = results[:, :4] top_conf = results[:, 4] top_label_indices = results[:, 5] boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = str(top_conf[i]) left, top, right, bottom = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
# 这里的SSD512不是原版的SSD512。 # 原版的SSD512的比SSD300多一个预测层; # 修改起来比较麻烦,所以我只是修改了输入大小 # 这样也可以用比较大的图片训练,对于小目标有好处 #----------------------------------------------------# input_shape = [300, 300, 3] #----------------------------------------------------# # 可用于设定先验框的大小,默认的anchors_size # 是根据voc数据集设定的,大多数情况下都是通用的! # 如果想要检测小物体,可以修改anchors_size # 一般调小浅层先验框的大小就行了!因为浅层负责小物体检测! # 比如anchors_size = [21,45,99,153,207,261,315] #----------------------------------------------------# anchors_size = [30, 60, 111, 162, 213, 264, 315] priors = get_anchors((input_shape[0], input_shape[1]), anchors_size) bbox_util = BBoxUtility(NUM_CLASSES, priors) model = SSD300(input_shape, NUM_CLASSES, anchors_size) #------------------------------------------------------# # 权值文件请看README,百度网盘下载 # 训练自己的数据集时提示维度不匹配正常 # 预测的东西都不一样了自然维度不匹配 #------------------------------------------------------# model_path = 'model_data/mobilenet_ssd_weights.h5' model.load_weights(model_path, by_name=True, skip_mismatch=True) #-------------------------------------------------------------------------------# # 训练参数的设置 # logging表示tensorboard的保存地址 # checkpoint用于设置权值保存的细节,period用于修改多少epoch保存一次