def num_detection(in_img): model_path = 'num_ssd.h5' # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes, 'compute_loss': ssd_loss.compute_loss}) img_height = 192 img_width = 192 test_img,test_single_channel = ssd2_resize(in_img,img_height,img_width) input_images=[] input_images.append(test_img) input_images = np.array(input_images) y_pred = model.predict(input_images) # 4: Decode the raw predictions in `y_pred`. confidence_threshold = 0.5 # y_pred_thresh = [y_pred[k][y_pred[k, :, 1] > confidence_threshold] for k in range(y_pred.shape[0])] y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.1, top_k=200, normalize_coords=True, img_height=img_height, img_width=img_width) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded[0]) new_list = [] t_img = test_img.copy() for box in y_pred_decoded[0]: if float(box[1])>0.7: i_class = int(box[0])-1 if int(box[0])!=11 else '-' xmin = int(box[2]) ymin = int(box[3]) xmax = int(box[4] ) ymax = int(box[5] ) cv2.rectangle(t_img, (xmin, ymin), (xmax, ymax), (0,0,255), 1) new_list.append([i_class,box[1],xmin,ymin,xmax,ymax]) # mask_sets.append([xmin,ymin,xmax,ymax]) # cv2.rectangle(input_images[0], (xmin, ymin), (xmax, ymax), 255, 2) input_images[0] = cv2.cvtColor(input_images[0], cv2.COLOR_RGB2BGR) # imwrite(dir_path + '/0_black_img.jpg', black_img) # mask = cv2.dilate(mask, kernel4) sort_arr = parse_box(new_list) imwrite(dir_path+'/0_ssd_detect.jpg', t_img) return sort_arr,test_img,test_single_channel
def draw_test(index, dataset, model): fig, ax = plt.subplots(1) image_index = dataset.dataset_indices[index] ax.imshow(dataset.images[image_index]) for label, xmin, ymin, xmax, ymax in dataset.labels[index]: rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, linewidth=1, edgecolor='r', facecolor='none') ax.add_patch(rect) if model != None: y_pred = model.predict([[dataset.images[image_index]]]) y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.1, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) for label, confid, xmin, ymin, xmax, ymax in y_pred_decoded[0]: rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, linewidth=1, edgecolor='b', facecolor='none') ax.add_patch(rect)
def inference_single_image(): im2 = cv2.imread('./1478899159823020309.jpg') #Converting it into batch dimensions resized = cv2.resize(im2, (480,300)) #print(resized.shape) frame2 = np.array(np.expand_dims(resized, axis=0), dtype=np.float32) #Detections which returns a list detections = sess.run([label_name], {input_name: frame2}) #List converted to the numpy array arr = np.asarray(detections) y = np.squeeze(arr, axis=0) y_pred_decoded = decode_detections(y, confidence_thresh=0.5, iou_threshold=0.45, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) for box in y_pred_decoded[0]: xmin = box[-4] ymin = box[-3] xmax = box[-2] ymax = box[-1] # print(xmin,ymin,xmax,ymax) label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) # cv2.rectangle(im2, (xmin,ymin),(xmax,ymax), color=color, thickness=2 ) cv2.rectangle(resized, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color=(0, 255, 0), thickness=2) cv2.putText(resized, label, (int(xmin), int(ymin)), font, fontScale, color, thickness) cv2.imshow('detected', resized) cv2.waitKey(0)
def get_classification(self, img): """Determines the color of the traffic light in the image Args: image (cv::Mat): image containing the traffic light Returns: int: ID of traffic light color (specified in styx_msgs/TrafficLight) """ input_images = [] b, g, r = cv2.split(img) rgb_img = cv2.merge([r, g, b]) resized = cv2.resize(rgb_img, (self.img_width, self.img_height)) resized.reshape( (1, self.img_height, self.img_width, self.img_channels)) input_images.append(resized) input_images = np.array(input_images) with graph.as_default(): y_pred = self.model.predict(input_images) y_pred_decoded = decode_detections( y_pred, confidence_thresh=0.2, iou_threshold=0.1, top_k=200, normalize_coords=self.normalize_coords, img_height=self.img_height, img_width=self.img_width) if len(y_pred_decoded[0]) == 0: return TrafficLight.UNKNOWN if len(y_pred_decoded[0][0]) == 0: return TrafficLight.UNKNOWN top3_green_avg = np.average( np.sort(y_pred_decoded[0][list(y_pred_decoded[0][:, 0] == 1), 1])[-3:]) top3_red_avg = np.average( np.sort(y_pred_decoded[0][list(y_pred_decoded[0][:, 0] == 2), 1])[-3:]) top3_yellow_avg = np.average( np.sort(y_pred_decoded[0][list(y_pred_decoded[0][:, 0] == 3), 1])[-3:]) top3s = np.nan_to_num([top3_green_avg, top3_red_avg, top3_yellow_avg]) best_avg = np.argmax(top3s) + 1 if best_avg == 1: return TrafficLight.GREEN else: if best_avg == 2: return TrafficLight.RED else: if best_avg == 3: return TrafficLight.YELLOW else: return TrafficLight.UNKNOWN
def predict(self): rate_x = self.image.shape[0] / img_width rate_y = self.image.shape[1] / img_height img = np.array(self.image) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) img = cv2.resize(img, (img_height, img_width)) img = img.reshape(1, img_width, img_height, 3) y_pred = self.model.predict(img) y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.6, iou_threshold=0.45, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) i = 0 n = 0 for box in y_pred_decoded[i]: print(box) xmin = int(box[-4] * rate_y) if xmin < 0: xmin = 0 ymin = int(box[-3] * rate_x) if ymin < 0: ymin = 0 xmax = int(box[-2] * rate_y) if xmax < 0: xmax = 0 ymax = int(box[-1] * rate_x) if ymax < 0: ymax = 0 image = self.image[ymin:ymax, xmin:xmax, :] self.object_detected.append(box) cv2.imwrite('images/person' + str(n) + '.jpg', image) n += 1
def text_detect(orig_images, input_images, model): y_pred = model.predict(input_images) # 3: Decode the raw predictions in `y_pred`. y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.17, iou_threshold=0.01, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) # 4: Convert the predictions for the original image + manual offset. # decode y_pred_decoded_inv = y_pred_decoded.copy() y_pred_decoded_inv[0][:, 2] = y_pred_decoded[0][:, 2] * \ orig_images[0].shape[1] / img_width - 3 y_pred_decoded_inv[0][:, 3] = y_pred_decoded[0][:, 3] * \ orig_images[0].shape[0] / img_height - 3 y_pred_decoded_inv[0][:, 4] = y_pred_decoded[0][:, 4] * \ orig_images[0].shape[1] / img_width + 28 y_pred_decoded_inv[0][:, 5] = y_pred_decoded[0][:, 5] * \ orig_images[0].shape[0] / img_height + 28 text_box = [] for y_pred_decode in y_pred_decoded_inv: text_box.append(y_pred_decode[:, 2:].astype(np.int32)) return text_box
def inference_video(): #Reading a dummy image cap = cv2.VideoCapture('/home/mohan/git/backups/drive_1_min_more_cars.mp4') #cap = cv2.VideoCapture('/home/mohan/git/backups/drive.mp4') prev_frame_time = 0 new_frame_time = 0 while cap.isOpened(): new_frame_time = time.time() ret, frame = cap.read() resized = cv2.resize(frame, (480, 300)) frame2 = np.array(np.expand_dims(resized, axis=0), dtype=np.float32) # Detections which returns a list detections = sess.run([label_name], {input_name: frame2}) # List converted to the numpy array arr = np.asarray(detections) y = np.squeeze(arr, axis=0) # 4: Decode the raw prediction `y_pred` y_pred_decoded = decode_detections(y, confidence_thresh=0.5, iou_threshold=0.45, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) np.set_printoptions(precision=2, suppress=True, linewidth=90) fps = 1 / (new_frame_time - prev_frame_time) prev_frame_time = new_frame_time # converting the fps into integer fps = int(fps) # converting the fps to string so that we can display it on frame # by using putText function fps = str(fps) ## Drawing a bounding box around the predictions for box in y_pred_decoded[0]: xmin = box[-4] ymin = box[-3] xmax = box[-2] ymax = box[-1] #print(xmin,ymin,xmax,ymax) label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) #cv2.rectangle(im2, (xmin,ymin),(xmax,ymax), color=color, thickness=2 ) cv2.rectangle(resized, (int(xmin),int(ymin)),(int(xmax),int(ymax)), color=(0,255,0), thickness=2 ) cv2.putText(resized, label, (int(xmin), int(ymin)), font, fontScale, color, thickness) cv2.putText(resized, fps, (7, 70), font, 3, (100, 255, 0), 3, cv2.LINE_AA) print(fps) cv2.imshow('im', resized) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() break
def handle_client(client_req): """ 处理客户端请求 """ # 获取客户端请求数据 req_buff = client_req.makefile('rb') # 解析请求报文 method, route, params, http_version = parse_first_line(req_buff) # print("method: " + method) # print("route: " + route) # print("params: " + str(params)) # print("http_version: " + http_version) headers = parse_headers(req_buff) # print('headers: ' + str(headers)) data = parse_body(req_buff, headers) # print('data: ' + data) body_content = json.loads(data) images_numpy = [] response_body = '' if 'path' in body_content: image_path = body_content['path'] if not os.path.isfile(image_path): print(f"'{image_path}' is not a file.") response_body += f"'{image_path}' is not a file." response_start_line = "HTTP/1.1 404 Bad Request\r\n" send_response(client_req, response_start_line, response_body) return print("load file from local: " + image_path) try: with Image.open(image_path) as image: images_numpy.append(np.array(image, dtype=np.uint8)) except OSError: print(f"Input '{image_path}' can not be open as an image.") response_body += f"Input '{image_path}' can not be open as an image." response_start_line = "HTTP/1.1 404 Bad Request\r\n" send_response(client_req, response_start_line, response_body) return elif 'image' in body_content: image_content = body_content['image'] print("load file from request body.") image_asc = image_content.endoce('ascii') image_decode = base64.b64decode(image_asc) images_numpy.append(np.frombuffer(image_decode, dtype=np.uint8)) images_numpy = np.array(images_numpy) y_pred = model.predict(images_numpy) y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.005, top_k=200, normalize_coords=True, img_height=img_height, img_width=img_width) pred_labels = set(y_pred_decoded[0][..., 0].astype(np.int8).astype( np.str).tolist()) # label_names = [classes[int(lb)] for lb in pred_labels] # 构造响应数据 response_body = ','.join(pred_labels) response_start_line = "HTTP/1.1 200 OK\r\n" send_response(client_req, response_start_line, response_body)
def inference_single_image(): #Reading a dummy image im2 = cv2.imread('./1478899365487445082.jpg') #im2 = image.img_to_array(im2) #Converting it into batch dimensions im3 = np.expand_dims(im2, axis=0) #print(im3.shape) # Make a prediction y_pred = model.predict(im3) #np.save('array_ssd7_pc.npy', y_pred) # 4: Decode the raw prediction `y_pred` y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.45, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') #print(y_pred_decoded[0]) #print(y_pred_decoded) #print(len(y_pred_decoded)) ## Drawing a bounding box around the predictions for box in y_pred_decoded[0]: xmin = box[-4] ymin = box[-3] xmax = box[-2] ymax = box[-1] #print(xmin,ymin,xmax,ymax) label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) #cv2.rectangle(im2, (xmin,ymin),(xmax,ymax), color=color, thickness=2 ) cv2.rectangle(im2, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color=(0, 255, 0), thickness=2) cv2.putText(im2, label, (int(xmin), int(ymin)), font, fontScale, color, thickness) # In[ ]: ''' value = True while (value): cv2.imshow('frame', im2) if cv2.waitKey(1) & 0xFF == ord('q'): break value = False ''' cv2.imshow('frame', im2) cv2.waitKey(0)
def get_ypred_decoded(self, r_img): y_pred = self.model.predict(r_img) #y_pred = model.predict(r_img) y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.9, iou_threshold=0.001, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) np.set_printoptions(precision=2, suppress=True, linewidth=90) return y_pred_decoded
def training_plot(self, epoch, logs): # plots results on epoch end if self.val_generator: imgs, gt = next(self.val_generator) y_pred = self.model.predict(np.expand_dims(imgs[0], 0)) y_pred_decoded = decode_detections( y_pred, confidence_thresh=0.5, iou_threshold=0.45, top_k=200, input_coords='centroids', normalize_coords=True, img_height=self.config.img_height, img_width=self.config.img_width) plt.figure(figsize=(6, 6)) plt.imshow(imgs[0]) current_axis = plt.gca() for box in y_pred_decoded[0]: class_id = box[0] confidence = box[1] xmin, ymin, xmax, ymax = box[2], box[3], box[4], box[5] label = '{}: {:.2f}'.format(self.id2digit[class_id], confidence) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='blue', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': 'blue', 'alpha': 1.0 }) plt.show() else: print('no val generator defined')
def postProccessing(self, y_pred): y_pred_decoded = decode_detections( y_pred, confidence_thresh=self.confidence_thresh, #0.25, iou_threshold=self.iou_threshold, #0.15, #0.45, top_k=self.top_k, #200, normalize_coords=self.normalize_coords, img_height=self.realHeight, img_width=self.realWidth) y_pred_decoded = y_pred_decoded[0] if y_pred_decoded.shape != (0, ): y_pred_decoded[:, 1] *= 100 y_pred_decoded = y_pred_decoded.astype(int) y_pred_fixed = self._fix_decoded(y_pred_decoded) else: y_pred_fixed = y_pred_decoded return y_pred_fixed
def predict(self,image): with self.sess.as_default(): with self.graph.as_default(): softmax_tensor = self.sess.graph.get_tensor_by_name('import/predictions/concat:0') pred = self.sess.run(softmax_tensor, {'import/input_1:0': np.array(image)}) y_pred_decoded = decode_detections(pred, confidence_thresh=0.5, iou_threshold=0.2, top_k=200, normalize_coords=True, img_height=180, img_width=240) return y_pred_decoded
def onlion_rec(self, img1): #输入应该是bgr的540×540的三通道图片 self.objtuple = [] img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB) dst = cv2.resize(img1, (270, 270), cv2.INTER_LINEAR) img1 = dst.reshape(1, 270, 270, 3) y_pred = self.model.predict(img1) y_pred_decoded = decode_detections( y_pred, confidence_thresh=0.92, iou_threshold=0.1, top_k=200, normalize_coords=self.normalize_coords, img_height=self.img_height, img_width=self.img_width) #print("Predicted boxes:\n") #print(' class conf xmin ymin xmax ymax') #print(y_pred_decoded[0]) self.output = y_pred_decoded[0]
def get_ypred_decoded(self,r_img): ''' Perform prediction on 1 image Arguments: r_img: rgb images which is reshaped to (1, h, w, c) Returns: Raw prediction that needed to be decode -> bboxes and labels ''' y_pred = self.ssd_model.predict(r_img) #y_pred = model.predict(r_img) y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.61, iou_threshold=0.1, top_k=200, normalize_coords=self.normalize_coords, img_height=240, img_width=320) return y_pred_decoded[0]
def get_img_with_bbox(model, frame): xmin, ymin, xmax, ymax = [0 for i in range(4)] orig_images = [] input_images = [] print('\nFrame shape before feeding: ', frame.shape) orig_images.append(frame) img = Image.fromarray(frame) img = img.resize((img_height, img_width)) img = image.img_to_array(img) input_images.append(img) input_images = np.array(input_images) y_pred = model.predict(input_images) y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.4, top_k=200, normalize_coords=True, img_height=img_height, img_width=img_width) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded[0]) for box in y_pred_decoded[0]: # Transform the predicted bounding boxes to the original image size. xmin = box[2] * video_width / img_width ymin = box[3] * video_height / img_height xmax = box[4] * video_width / img_width ymax = box[5] * video_height / img_height print(xmin, ymin, xmax, ymax) return xmin, ymin, xmax, ymax
def predict_all(dataset, model): predict_generator = dataset.generate( batch_size=1, shuffle=False, transformations=[], label_encoder=None, returns={'processed_images', 'processed_labels'}, keep_images_without_gt=False) images = [] labels = [] for i in range(len(dataset.images)): x, l = next(predict_generator) images.append(x) labels.append(l[0]) print(x.shape) images = np.concatenate(images, axis=0) y_pred = model.predict(images) y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.2, iou_threshold=0.0, top_k=200, normalize_coords=normalize_coords, img_height=images.shape[1], img_width=images.shape[2]) missed_ar = [] best_iou = [] for true_labels, pred_labels in zip(labels, y_pred_decoded): for tl in true_labels: _sorted = [(iou(tl, pl), i) for pl in pred_labels] if len(_sorted) != 0: _sorted.sort() best_iou, best_match = _sorted[-1] else: best_iou = 0 best_match = None missed_ar.append(aratio(tl)) print(best_iou)
for i in range(test_size): print("Image:", batch_filenames[i]) print() print("Ground truth boxes:\n") print(np.array(batch_original_labels[i])) # 3: Make predictions. y_pred = model.predict(batch_images) # 4: Decode the raw predictions in `y_pred`. y_pred_decoded = decode_detections(y_pred, confidence_thresh=float(params["confidence_thresh"]), iou_threshold=float(params["iou_thresh"]), top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) # 5: Convert the predictions for the original image. y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded_inv[i]) # 5: Draw the predicted boxes onto the image
def play(): start_time_video = time.time() cap = cv2.VideoCapture( "/home/kara9147/ML/caltech-pedestrian-dataset-converter/data/plots/set00_V000.avi" ) #cap = cv2.VideoCapture("/home/kara9147/ML/caltech-pedestrian-dataset-converter/data/plots/set03_V008.avi") #cap = cv2.VideoCapture("/home/kara9147/ML/caltech-pedestrian-dataset-converter/data/plots/set08_V004.avi") #cap = cv2.VideoCapture("/home/kara9147/ML/JAAD/clips/video_0006.mp4") # Time to read all frames, predict and put bounding boxes around them, and show them. i = 0 total_time = 0.0 # Capture frame-by-frame ret = True while (ret): ret, origimg = cap.read() i = i + 1 #print("Processing {} th frame".format(i)) if (ret != False): # Our operations on the frame come here img = cv2.resize(origimg, (img_width, img_height)) # Open CV uses BGR color format frame = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #print(type(img)) #print(img.shape) batch_img = np.expand_dims(frame, axis=0) #print(batch_img.shape ) current = time.time() ##################################PREDICTION###################### y_pred = model.predict(batch_img) end = time.time() diff = end - current total_time = total_time + diff print(end - current) print("Time spent for predicting: {0}".format(diff)) # 4: Decode the raw prediction `y_pred` y_pred_decoded = decode_detections( y_pred, confidence_thresh=0.3, iou_threshold=0.45, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) np.set_printoptions(precision=2, suppress=True, linewidth=90) # print("Predicted boxes:\n") # print(' class conf xmin ymin xmax ymax') #print(y_pred_decoded) #print(time.time() - start_time) if (y_pred_decoded and len(y_pred_decoded[0])): colors = plt.cm.hsv(np.linspace( 0, 1, n_classes + 1)).tolist() # Set the colors for the bounding boxes classes = [ 'background', 'person', 'people' ] # Just so we can print class names onto the image instead of IDs # Draw the predicted boxes in blue for box in y_pred_decoded[0]: xmin = int(box[-4]) ymin = int(box[-3]) xmax = int(box[-2]) ymax = int(box[-1]) color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) #print((xmin, ymin)) #print((xmax, ymax)) cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 1) # Display the resulting frame cv2.imshow('frame', img) # waitKey: 0, wait indefinitely if cv2.waitKey(1) & 0xFF == ord('q'): break end_time_video = time.time() print("No of frames: {}".format(i)) print("Total Time: {}".format(total_time)) print("fps: {}".format(i / (total_time))) # When everything done, release the capture cap.release() cv2.destroyAllWindows()
cap = cv2.VideoCapture(0) #cap.set(cv2.CAP_PROP_FPS, 10) ----- uncomment this line and set the fps of camera capture if required. while True: ret, frame = cap.read() frame=cv2.resize(frame,(512,512)) frame2=frame frame= frame[...,::-1] #convert from rgb to bgr frame=np.expand_dims(frame, axis=0) y_pred = model.predict(frame) y_pred_thresh = decode_detections(y_pred, confidence_thresh=0.3, iou_threshold=0.5, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) np.set_printoptions(precision=2, suppress=True, linewidth=90) # classes = ['background', 'face'] current_axis = plt.gca() for box in y_pred_thresh[0]:
def test_config(config): ''' Test the given configuration ; the configuration should already have been used for training purposes, or this will return an error (see ssd_train.py) Arguments: config : the configuration of the model to use ; should already be loaded. ''' local_dir = config.ROOT_FOLDER data_dir = config.DATA_DIR img_shape = config.IMG_SHAPE img_height = img_shape[0] # Height of the model input images img_width = img_shape[1] # Width of the model input images img_channels = img_shape[ 2] # Number of color channels of the model input images n_classes = 20 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO normalize_coords = True K.clear_session() # Clear previous models from memory. print("[INFO] loading model...") model_path = os.path.join(local_dir, 'models', config.MODEL_NAME) # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0) model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'L2Normalization': L2Normalization, 'DecodeDetections': DecodeDetections, 'compute_loss': ssd_loss.compute_loss }) classes = config.CLASSES dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) dataset_images_dir = os.path.join(data_dir, 'Images') dataset_annotations_dir = os.path.join(data_dir, 'Annotations/') dataset_test_image_set_filename = os.path.join(data_dir, 'ImageSets\\test.txt') dataset.parse_xml(images_dirs=[dataset_images_dir], image_set_filenames=[dataset_test_image_set_filename], annotations_dirs=[dataset_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False) dataset.create_hdf5_dataset(file_path=config.MODEL_NAME, resize=False, variable_image_size=True, verbose=True) convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) dataset_size = dataset.get_dataset_size() print("Number of images in the dataset:\t{:>6}".format(dataset_size)) predict_generator = dataset.generate( batch_size=config.PREDICT_BATCH_SIZE, shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=None, returns={ 'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels' }, keep_images_without_gt=False) count = 0 while True and count < dataset_size: batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( predict_generator) i = 0 print("Image:", batch_filenames[i]) print() print("Ground truth boxes:\n") print(np.array(batch_original_labels[i])) y_pred = model.predict(batch_images) y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.4, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) y_pred_decoded_inv = apply_inverse_transforms( y_pred_decoded, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded_inv[i]) # cv2.imshow('original image',batch_original_images[i]) # cv2.waitKey(800) # cv2.destroyAllWindows() colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist() plt.figure(figsize=(15, 8)) plt.imshow(batch_original_images[i]) current_axis = plt.gca() len_orig = 0 for box in batch_original_labels[i]: len_orig += 1 xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': 'green', 'alpha': 1.0 }) len_found = 0 for box in y_pred_decoded_inv[i]: len_found += 1 xmin = box[2] ymin = box[3] xmax = box[4] ymax = box[5] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': color, 'alpha': 1.0 }) print('Number of original boxes : {}'.format(len_orig)) print('Number of found boxes : {}'.format(len_found)) plt.show() count += 1
def main(): img_height = 300 img_width = 480 n_classes = 5 ### Load model LOAD_MODEL = True if LOAD_MODEL: # TODO: Set the path to the `.h5` file of the model to be loaded. model_path = '../ConeData/SavedModels/training3/(ssd7_epoch-10_loss-0.3291_val_loss-0.2664.h5' # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'DecodeDetections': DecodeDetections, 'compute_loss': ssd_loss.compute_loss }) ### Read video # cap = cv2.VideoCapture('test_videos/Building Self Driving Car - Local Dataset - Day.mp4') #cap = cv2.VideoCapture('test_videos/original.m4v') cap = cv2.VideoCapture('test_videos/20180619_175221224.mp4') width = int(cap.get(3)) height = int(cap.get(4)) property_id = int(cv2.CAP_PROP_FRAME_COUNT) fps = cap.get(cv2.CAP_PROP_FPS) total_frames = int(cv2.VideoCapture.get(cap, property_id)) count = 0 detect = True while (count < total_frames): #print(str(j)+'/'+str(total_frames)) # Capture frame-by-frame ret, frame = cap.read() # if ret == True: # cv2.imshow('original frame', frame) # cv2.waitKey(10) if detect == True: frame = frame[..., ::-1] frame_resized = cv2.resize(frame, (480, 300)) frame_tensor = np.expand_dims(frame_resized, axis=0) ### Make predictions y_pred = model.predict(frame_tensor) y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.75, iou_threshold=0.45, top_k=200, normalize_coords=True, img_height=img_height, img_width=img_width) #plt.figure(figsize=(20,12)) #plt.imshow(frame_resized) #current_axis = plt.gca() ### plot predictions colors = plt.cm.hsv(np.linspace( 0, 1, n_classes + 1)).tolist() # Set the colors for the bounding boxes #classes = ['background', 'car', 'truck', 'pedestrian', 'bicyclist', 'light'] # Just so we can print class names onto the image instead of IDs classes = [ 'background', 'cone' ] # Just so we can print class names onto the image instead of IDs # Draw the predicted boxes in blue for box in y_pred_decoded[0]: xmin = int(box[-4]) ymin = int(box[-3]) xmax = int(box[-2]) ymax = int(box[-1]) #convert to x,y,w,h format # x_bbox = int(xmin) # y_bbox = int(ymin) # w_bbox = abs(int(xmax - xmin)) # h_bbox = abs(int(ymax - ymin)) color = colors[int(box[0])] cv2.rectangle(frame_resized, (xmin, ymin), (xmax, ymax), (0, 255, 0), 5) label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) # current_axis.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, color=color, fill=False, linewidth=2)) # current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor':color, 'alpha':1.0}) cv2.putText(frame_resized, label, (int(xmin), int(ymin) - 10), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA) #plt.savefig('output_frames/video_frame'+str(j)+'.png') #plt.close('all') #if j % 10 == 0: #clear_output() cv2.imshow('ssd7_inference', frame_resized) cv2.waitKey(10) count = count + 1 # Break the loop #else: #break #out.release() cap.release()
def main(): create_new_model = True if args.model_name == 'default' else False if create_new_model: K.clear_session() # Clear previous models from memory. model = ssd_512(image_size=(Config.img_height, Config.img_width, Config.img_channels), n_classes=Config.n_classes, mode='training', l2_regularization=Config.l2_regularization, scales=Config.scales, aspect_ratios_per_layer=Config.aspect_ratios, two_boxes_for_ar1=Config.two_boxes_for_ar1, steps=Config.steps, offsets=Config.offsets, clip_boxes=Config.clip_boxes, variances=Config.variances, normalize_coords=Config.normalize_coords, subtract_mean=Config.mean_color, swap_channels=Config.swap_channels) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) else: model_path = "weights/" + args.model_name + ".h5" # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'L2Normalization': L2Normalization, 'compute_loss': ssd_loss.compute_loss }) # Load the data train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_train.h5') val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_val.h5') train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) batch_size = args.batch_size # For the training generator: ssd_data_augmentation = SSDDataAugmentation(img_height=Config.img_height, img_width=Config.img_width, background=Config.mean_color) # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=Config.img_height, width=Config.img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3], model.get_layer('conv10_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=Config.img_height, img_width=Config.img_width, n_classes=Config.n_classes, predictor_sizes=predictor_sizes, scales=Config.scales, aspect_ratios_per_layer=Config.aspect_ratios, two_boxes_for_ar1=Config.two_boxes_for_ar1, steps=Config.steps, offsets=Config.offsets, clip_boxes=Config.clip_boxes, variances=Config.variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=Config.normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) model_checkpoint = ModelCheckpoint( filepath=os.getcwd() + '/weights/ssd512_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=30) csv_logger = CSVLogger(filename='ssd512_training_log.csv', separator=',', append=True) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule) terminate_on_nan = TerminateOnNaN() tf_log = keras.callbacks.TensorBoard(log_dir=TF_LOG_PATH + args.tf_logs, histogram_freq=0, batch_size=batch_size, write_graph=True, write_grads=False, write_images=False) callbacks = [ model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan, tf_log ] # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. initial_epoch = 0 final_epoch = args.final_epoch steps_per_epoch = 500 # Train/Fit the model if args.predict_mode == 'train': history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) # Prediction Output predict_generator = val_dataset.generate( batch_size=1, shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=None, returns={ 'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels' }, keep_images_without_gt=False) i = 0 for val in range(val_dataset_size): batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( predict_generator) y_pred = model.predict(batch_images) y_pred_decoded = decode_detections( y_pred, confidence_thresh=0.5, iou_threshold=0.4, top_k=200, normalize_coords=Config.normalize_coords, img_height=Config.img_height, img_width=Config.img_width) # 5: Convert the predictions for the original image. y_pred_decoded_inv = apply_inverse_transforms( y_pred_decoded, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded_inv[i]) plt.figure(figsize=(20, 12)) plt.imshow(batch_images[i]) current_axis = plt.gca() colors = plt.cm.hsv( np.linspace(0, 1, Config.n_classes + 1)).tolist() # Set the colors for the bounding boxes classes = [ 'background', 'polyps' ] # Just so we can print class names onto the image instead of IDs for box in batch_original_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': 'green', 'alpha': 1.0 }) for box in y_pred_decoded_inv[i]: xmin = box[2] ymin = box[3] xmax = box[4] ymax = box[5] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': color, 'alpha': 1.0 }) image = plt.gcf() plt.draw() image.savefig(os.getcwd() + "/val_ssd512val_" + str(val) + ".png", dpi=100)
def predict_num_area(src): ''' :param src:opencv读取的图 :return: scale:缩放倍率,mask:数字区域的全白图,all_blocks:矩形坐标 ''' model_path = '../ssd7_pascal_07_epoch-17_loss-0.8387_val_loss-0.8608.h5' # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'compute_loss': ssd_loss.compute_loss }) img_height = 341 img_width = 256 scale_y = src.shape[0] / img_height scale_x = src.shape[1] / img_width normalize_coords = True orig_images = [] # Store the images here. input_images = [] # Store resized versions of the images here. # We'll only load one image in this example. # filename = '../NumInstrument/img/im3.JPG' # filename='../ssd_trains/JPEGImages/image1024.JPG' # img = cv2.imread(filename) if scale_x > scale_y: scale = scale_x real_w = img_width real_h = int(src.shape[0] / scale) black_img = np.zeros((img_height, img_width, 3), np.uint8) img = cv2.resize(src, (real_w, real_h)) t_ = int((img_height - real_h) / 2) black_img[t_:t_ + real_h, :] = img else: scale = scale_y real_w = int(src.shape[1] / scale) real_h = img_height black_img = np.zeros((img_height, img_width, 3), np.uint8) img = cv2.resize(src, (real_w, real_h)) t_ = int((img_width - real_w) / 2) black_img[:, t_:t_ + real_w] = img orig_images.append(black_img) img = cv2.cvtColor(black_img, cv2.COLOR_BGR2RGB) input_images.append(img) input_images = np.array(input_images) y_pred = model.predict(input_images) # 4: Decode the raw predictions in `y_pred`. confidence_threshold = 0.8 y_pred_thresh = [ y_pred[k][y_pred[k, :, 1] > confidence_threshold] for k in range(y_pred.shape[0]) ] y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.45, top_k=200, normalize_coords=True, img_height=img_height, img_width=img_width) np.set_printoptions(precision=2, suppress=True, linewidth=90) # print("Predicted boxes:\n") # print(' class conf xmin ymin xmax ymax') # print(y_pred_decoded[0]) mask = np.zeros((img_height, img_width), np.uint8) max_arr = sorted(y_pred_decoded[0], key=lambda xx: xx[1], reverse=True) for box in max_arr[:1]: xmin = int(box[2] - 2) if int(box[2] - 2) >= 0 else 0 ymin = int(box[3]) xmax = int(box[4] + 2) ymax = int(box[5] + 1) # cv2.rectangle(mask, (xmin, ymin), (xmax, ymax), 255, -1) cv2.rectangle(input_images[0], (xmin, ymin), (xmax, ymax), 255, 2) # cv2.rectangle(src, (int((xmin)*scale), int((ymin-t_)*scale)), (int((xmax)*scale), int((ymax-t_)*scale)), 255, 2) write_xml(img_name, [ int((xmin) * scale / 4.21), int((ymin - t_) * scale / 4.21), int((xmax) * scale / 4.21), int((ymax - t_) * scale / 4.21) ]) input_images[0] = cv2.cvtColor(input_images[0], cv2.COLOR_RGB2BGR) # imwrite(dir_path + '/0_black_img.jpg', black_img) # mask = cv2.dilate(mask, kernel4) imwrite(dir_path + img_name, input_images[0]) # imwrite(dir_path+'1.jpg', src) # imwrite(dir_path+'/4_pre_res' + str(index) + '.jpg', input_images[0]) # # all_blocks = cutBlocks(mask,input_images[0]) # # print(all_blocks,scale) # if len(all_blocks)==0: # print('err', '未找到数字区域') # # sys.exit(0) # # tmp=[] # # for abox in all_blocks: # if scale_x>scale_y: # aa=[int((abox[0])*scale),int((abox[1]-t_)*scale),int(abox[2]*scale),int((abox[3]-t_)*scale)] # else: # aa=[int((abox[0]-t_)*scale),int(abox[1]*scale),int((abox[2]-t_)*scale),int(abox[3]*scale)] # tmp.append(aa) # all_blocks = tmp K.clear_session() return mask
title = random.choice(imageset_content) print(title) # title = "B4_02_02_22_hor" img = Image.open(os.path.join(TARGET_IMAGES, f"{title}.jpg")) np_img = numpy.array(img) xml_path = os.path.join(TARGET_ANNOTATIONS, f"{title}.xml") boxes = read_content(xml_path) model = get_model() y_pred = model.predict(numpy.array([np_img])) # 4: Decode the raw predictions in `y_pred`. y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.4, top_k=200, normalize_coords=True, img_height=300, img_width=300) y_pred_decoded = y_pred_decoded[0] display_image(np_img, boxes) display_image(np_img, y_pred_decoded) print('test')
def predict_num_area(src): ''' :param src:opencv读取的图 :return: scale:缩放倍率,mask:数字区域的全白图,all_blocks:矩形坐标 ''' model_path = 'ssd7_pascal_07_epoch-17_loss-0.8387_val_loss-0.8608.h5' # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'compute_loss': ssd_loss.compute_loss }) img_height = 341 img_width = 256 scale_y = src.shape[0] / 341 scale_x = src.shape[1] / 256 normalize_coords = True orig_images = [] # Store the images here. input_images = [] # Store resized versions of the images here. # We'll only load one image in this example. filename = '../NumInstrument/img/im3.JPG' # filename='../ssd_trains/JPEGImages/image1024.JPG' # img = cv2.imread(filename) img = cv2.resize(src, (img_width, img_height)) orig_images.append(img) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) input_images.append(img) input_images = np.array(input_images) y_pred = model.predict(input_images) # 4: Decode the raw predictions in `y_pred`. confidence_threshold = 0.5 y_pred_thresh = [ y_pred[k][y_pred[k, :, 1] > confidence_threshold] for k in range(y_pred.shape[0]) ] y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.45, top_k=200, normalize_coords=True, img_height=img_height, img_width=img_width) np.set_printoptions(precision=2, suppress=True, linewidth=90) # print("Predicted boxes:\n") # print(' class conf xmin ymin xmax ymax') # print(y_pred_decoded[0]) mask = np.zeros((341, 256), np.uint8) for box in y_pred_decoded[0]: xmin = int(box[2]) ymin = int(box[3]) xmax = int(box[4] - 2) ymax = int(box[5] + 5) cv2.rectangle(mask, (xmin, ymin), (xmax, ymax), 255, -1) cv2.rectangle(input_images[0], (xmin, ymin), (xmax, ymax), 255, 2) input_images[0] = cv2.cvtColor(input_images[0], cv2.COLOR_RGB2BGR) mask = cv2.dilate(mask, kernel4) cv2.imwrite('./pre_model/4_pre_mask' + str(0) + '.jpg', mask) cv2.imwrite('./pre_model/4_pre_res' + str(0) + '.jpg', input_images[0]) all_blocks = cutImage(mask, input_images[0], -1) # print(all_blocks,scale) tmp = [] for abox in all_blocks: aa = [ int(abox[0] * scale_x), int(abox[1] * scale_y), int(abox[2] * scale_x), int(abox[3] * scale_y) ] tmp.append(aa) all_blocks = tmp K.clear_session() return mask, all_blocks
def main(): create_new_model = True if args.model_name == 'default' else False if create_new_model: K.clear_session() # Clear previous models from memory. model = build_model(image_size=(Config.img_height, Config.img_width, Config.img_channels), n_classes=Config.n_classes, mode='training', l2_regularization=Config.l2_regularization, scales=Config.scales, aspect_ratios_global=Config.aspect_ratios, aspect_ratios_per_layer=None, two_boxes_for_ar1=Config.two_boxes_for_ar1, steps=Config.steps, offsets=Config.offsets, clip_boxes=Config.clip_boxes, variances=Config.variances, normalize_coords=Config.normalize_coords, subtract_mean=Config.intensity_mean, divide_by_stddev=Config.intensity_range) # model.load_weights("./weights/"+ args.model_name + ".h5", by_name=True) adam = Adam(lr=args.learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) else: model_path = "weights/" + args.model_name + ".h5" # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'compute_loss': ssd_loss.compute_loss }) # Load the data train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_train.h5') val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_val.h5') train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) batch_size = args.batch_size # 4: Define the image processing chain. data_augmentation_chain = DataAugmentationConstantInputSize( random_brightness=(-48, 48, 0.5), random_contrast=(0.5, 1.8, 0.5), random_saturation=(0.5, 1.8, 0.5), random_hue=(18, 0.5), random_flip=0.5, random_translate=((0.03, 0.5), (0.03, 0.5), 0.5), random_scale=(0.5, 2.0, 0.5), n_trials_max=3, clip_boxes=True, overlap_criterion='area', bounds_box_filter=(0.3, 1.0), bounds_validator=(0.5, 1.0), n_boxes_min=1, background=(0, 0, 0)) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('classes4').output_shape[1:3], model.get_layer('classes5').output_shape[1:3], model.get_layer('classes6').output_shape[1:3], model.get_layer('classes7').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=Config.img_height, img_width=Config.img_width, n_classes=Config.n_classes, predictor_sizes=predictor_sizes, scales=Config.scales, aspect_ratios_global=Config.aspect_ratios, two_boxes_for_ar1=Config.two_boxes_for_ar1, steps=Config.steps, offsets=Config.offsets, clip_boxes=Config.clip_boxes, variances=Config.variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.3, normalize_coords=Config.normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[data_augmentation_chain], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) model_checkpoint = ModelCheckpoint( filepath=os.getcwd() + '/weights/ssd7_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) csv_logger = CSVLogger(filename='ssd7_training_log.csv', separator=',', append=True) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.0, patience=10, verbose=1) reduce_learning_rate = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=8, verbose=1, epsilon=0.001, cooldown=0, min_lr=0.00001) tf_log = keras.callbacks.TensorBoard(log_dir=TF_LOG_PATH + args.tf_logs, histogram_freq=0, batch_size=batch_size, write_graph=True, write_grads=False, write_images=False) callbacks = [model_checkpoint, csv_logger, reduce_learning_rate, tf_log] # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. initial_epoch = 0 final_epoch = args.final_epoch steps_per_epoch = 1000 # Train/Fit the model if args.predict_mode == 'train': history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) # Prediction Output predict_generator = val_dataset.generate( batch_size=1, shuffle=False, transformations=[], label_encoder=ssd_input_encoder, returns={'processed_images', 'processed_labels', 'filenames'}, keep_images_without_gt=False) i = 0 for val in range(val_dataset_size): batch_images, batch_labels, batch_filenames = next(predict_generator) y_pred = model.predict(batch_images) y_pred_decoded = decode_detections( y_pred, confidence_thresh=0.5, iou_threshold=0.5, top_k=200, normalize_coords=Config.normalize_coords, img_height=Config.img_height, img_width=Config.img_width) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded[i]) plt.figure(figsize=(20, 12)) plt.imshow(batch_images[i]) current_axis = plt.gca() colors = plt.cm.hsv( np.linspace(0, 1, Config.n_classes + 1)).tolist() # Set the colors for the bounding boxes classes = [ 'background', 'polyps' ] # Just so we can print class names onto the image instead of IDs # Draw the ground truth boxes in green (omit the label for more clarity) for box in batch_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': 'green', 'alpha': 1.0 }) # Draw the predicted boxes in blue for box in y_pred_decoded[i]: xmin = box[-4] ymin = box[-3] xmax = box[-2] ymax = box[-1] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': color, 'alpha': 1.0 }) image = plt.gcf() # plt.show() plt.draw() image.savefig(os.getcwd() + "/val_predictions/val_" + str(val) + ".png", dpi=100)
def predict_all_to_json(out_file, model, img_height, img_width, classes_to_cats, data_generator, batch_size, data_generator_mode='resize', model_mode='training', confidence_thresh=0.01, iou_threshold=0.45, top_k=200, pred_coords='centroids', normalize_coords=True): ''' Runs detection predictions over the whole dataset given a model and saves them in a JSON file in the MS COCO detection results format. Arguments: out_file (str): The file name (full path) under which to save the results JSON file. model (Keras model): A Keras SSD model object. img_height (int): The input image height for the model. img_width (int): The input image width for the model. classes_to_cats (dict): A dictionary that maps the consecutive class IDs predicted by the model to the non-consecutive original MS COCO category IDs. data_generator (DataGenerator): A `DataGenerator` object with the evaluation dataset. batch_size (int): The batch size for the evaluation. data_generator_mode (str, optional): Either of 'resize' or 'pad'. If 'resize', the input images will be resized (i.e. warped) to `(img_height, img_width)`. This mode does not preserve the aspect ratios of the images. If 'pad', the input images will be first padded so that they have the aspect ratio defined by `img_height` and `img_width` and then resized to `(img_height, img_width)`. This mode preserves the aspect ratios of the images. model_mode (str, optional): The mode in which the model was created, i.e. 'training', 'inference' or 'inference_fast'. This is needed in order to know whether the model output is already decoded or still needs to be decoded. Refer to the model documentation for the meaning of the individual modes. confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific positive class in order to be considered for the non-maximum suppression stage for the respective class. A lower value will result in a larger part of the selection process being done by the non-maximum suppression stage, while a larger value will result in a larger part of the selection process happening in the confidence thresholding stage. iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold` with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers to the box score. top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the non-maximum suppression stage. Defaults to 200, following the paper. input_coords (str, optional): The box coordinate format that the model outputs. Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`. normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1]) and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`. Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect coordinates. Requires `img_height` and `img_width` if set to `True`. Returns: None. ''' convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height,width=img_width) if data_generator_mode == 'resize': transformations = [convert_to_3_channels, resize] elif data_generator_mode == 'pad': random_pad = RandomPadFixedAR(patch_aspect_ratio=img_width/img_height, clip_boxes=False) transformations = [convert_to_3_channels, random_pad, resize] else: raise ValueError("Unexpected argument value: `data_generator_mode` can be either of 'resize' or 'pad', but received '{}'.".format(data_generator_mode)) # Set the generator parameters. generator = data_generator.generate(batch_size=batch_size, shuffle=False, transformations=transformations, label_encoder=None, returns={'processed_images', 'image_ids', 'inverse_transform'}, keep_images_without_gt=True) # Put the results in this list. results = [] # Compute the number of batches to iterate over the entire dataset. n_images = data_generator.get_dataset_size() print("Number of images in the evaluation dataset: {}".format(n_images)) n_batches = int(ceil(n_images / batch_size)) # Loop over all batches. tr = trange(n_batches, file=sys.stdout) tr.set_description('Producing results file') for i in tr: # Generate batch. batch_X, batch_image_ids, batch_inverse_transforms = next(generator) # Predict. y_pred = model.predict(batch_X) # If the model was created in 'training' mode, the raw predictions need to # be decoded and filtered, otherwise that's already taken care of. if model_mode == 'training': # Decode. y_pred = decode_detections(y_pred, confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, input_coords=pred_coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) else: # Filter out the all-zeros dummy elements of `y_pred`. y_pred_filtered = [] for i in range(len(y_pred)): y_pred_filtered.append(y_pred[i][y_pred[i,:,0] != 0]) y_pred = y_pred_filtered # Convert the predicted box coordinates for the original images. y_pred = apply_inverse_transforms(y_pred, batch_inverse_transforms) # Convert each predicted box into the results format. for k, batch_item in enumerate(y_pred): for box in batch_item: class_id = box[0] # Transform the consecutive class IDs back to the original COCO category IDs. cat_id = classes_to_cats[class_id] # Round the box coordinates to reduce the JSON file size. xmin = float(round(box[2], 1)) ymin = float(round(box[3], 1)) xmax = float(round(box[4], 1)) ymax = float(round(box[5], 1)) width = xmax - xmin height = ymax - ymin bbox = [xmin, ymin, width, height] result = {} result['image_id'] = batch_image_ids[k] result['category_id'] = cat_id result['score'] = float(round(box[1], 3)) result['bbox'] = bbox results.append(result) with open(out_file, 'w') as f: json.dump(results, f) print("Prediction results saved in '{}'".format(out_file))
def _main_(args): print('Hello World! This is {:s}'.format(args.desc)) # config_path = args.conf # with open(config_path) as config_buffer: # config = json.loads(config_buffer.read()) ############################################################# # Set model parameters ############################################################# img_height = 300 # Height of the model input images img_width = 300 # Width of the model input images img_channels = 3 # Number of color channels of the model input images mean_color = [123, 117, 104] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights. swap_channels = [2, 1, 0] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images. n_classes = 20 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets scales = scales_pascal aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters two_boxes_for_ar1 = True steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer. offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are divided as in the original implementation normalize_coords = True ############################################################# # Create the model ############################################################# # 1: Build the Keras model. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) # 2: Load some weights into the model. # 3: Instantiate an optimizer and the SSD loss function and compile the model. adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) ############################################################# # Prepare the data ############################################################# # 1: Instantiate two `DataGenerator` objects: One for training, one for validation. train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) # 2: Parse the image and label lists for the training and validation datasets. This can take a while. VOC_2007_images_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages' VOC_2007_annotations_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/Annotations' VOC_2007_train_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/train.txt' VOC_2007_val_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/val.txt' # VOC_2007_trainval_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt' # VOC_2007_test_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/test.txt' classes = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] train_dataset.parse_xml(images_dirs=[VOC_2007_images_dir], image_set_filenames=[VOC_2007_train_image_set_filename], annotations_dirs=[VOC_2007_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) val_dataset.parse_xml(images_dirs=[VOC_2007_images_dir], image_set_filenames=[VOC_2007_val_image_set_filename], annotations_dirs=[VOC_2007_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False) train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5', resize=False, variable_image_size=True, verbose=True) val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5', resize=False, variable_image_size=True, verbose=True) # 3: Set the batch size. batch_size = 8 # Change the batch size if you like, or if you run into GPU memory issues. # 4: Set the image transformations for pre-processing and data augmentation options. ssd_data_augmentation = SSDDataAugmentation(img_height=img_height, img_width=img_width, background=mean_color) convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3]] ssd_input_encoder = SSDInputEncoder(img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate(batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate(batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size)) ############################################################# # Kick off the training ############################################################# # Define model callbacks. model_checkpoint = ModelCheckpoint( filepath='ssd300_pascal_07+12_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) csv_logger = CSVLogger(filename='ssd300_pascal_07+12_training_log.csv', separator=',', append=True) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan] # Train initial_epoch = 0 final_epoch = 120 steps_per_epoch = 1000 history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) ############################################################# # Run the evaluation ############################################################# # 1: Set the generator for the predictions. predict_generator = val_dataset.generate(batch_size=1, shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=None, returns={'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels'}, keep_images_without_gt=False) # 2: Generate samples. batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( predict_generator) i = 0 # Which batch item to look at print("Image:", batch_filenames[i]) print() print("Ground truth boxes:\n") print(np.array(batch_original_labels[i])) # 3: Make predictions. y_pred = model.predict(batch_images) # 4: Decode the raw predictions in `y_pred`. y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.4, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) # 5: Convert the predictions for the original image. y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded_inv[i]) # 6: Draw the predicted boxes onto the image # Set the colors for the bounding boxes colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist() classes = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] plt.figure(figsize=(20, 12)) plt.imshow(batch_original_images[i]) current_axis = plt.gca() for box in batch_original_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': 'green', 'alpha': 1.0}) for box in y_pred_decoded_inv[i]: xmin = box[2] ymin = box[3] xmax = box[4] ymax = box[5] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': color, 'alpha': 1.0})
def on_epoch_end(self, epoch, logs=None): # Compute f1 score by applying nms # Make predictions # Create variable to store predictions predictions = np.zeros(shape=self.output_shape) if self.label_csv == None: for batch in hp.get_batch(32, self.data): pred = self.model.predict(batch) predictions = np.append(predictions, pred, axis=0) else: file_label = pd.read_csv(self.label_csv) # get all images' names file_column = file_label.columns img_val = file_label[file_column[0]].unique() normalized_label = [] # Iterate over images for start_i in range(0, len(img_val), 32): end_i = start_i + 32 input_ = [] for img_name in img_val[start_i:end_i]: img = imread(self.path_img + '/' + img_name) height = img.shape[0] width = img.shape[1] # get labels from image original_label = file_label[file_label[file_column[0]] == img_name].values[:, 1:-1] # change formato from xmin, xmax, ymin, ymax to x, y, width, height new_label = [] for o_label in original_label: new_label.append([o_label[0], o_label[2], o_label[1] - o_label[0], o_label[3]- o_label[2]]) # normalized between [0,1] new_label = hp.normilize_boxes(new_label, width, height) normalized_label.append(new_label) # resize image resized_img= misc.imresize(img, size=(300, 300)) input_.append(resized_img) input_ = np.array(input_) input_ = input_.reshape(-1, 300, 300, 3) pred = self.model.predict(input_) predictions = np.append(predictions, pred, axis=0) predictions = predictions[1:] # delete empty item # Decode predictinos pred_decod = decode_detections(predictions, confidence_thresh=self.confidence, iou_threshold=self.iou, top_k=self.top_k, normalize_coords=self.normalize_coords, img_height=self.height, img_width=self.width) pred_decod = np.array(pred_decod) # Remove class and confidence from predictions pred_decod = hp.clean_predictions(pred_decod, id_class=1) pred_decod = hp.adjust_predictions(pred_decod) pred_decod = hp.get_coordinates(pred_decod) aux_decod = [] for item in pred_decod: aux_decod.append(hp.normilize_boxes(item, self.width, self.height)) pred_decod = aux_decod # Calculate performance if self.label_csv == None: presicion, recall, f1_score = hp.cal_performance(self.label, pred_decod, verborse=self.verborse, iou=self.iou_f1) else: presicion, recall, f1_score = hp.cal_performance(normalized_label, pred_decod, verborse=self.verborse, iou=self.iou_f1) print('F1 score:', f1_score) self.history.append([epoch, presicion, recall, f1_score]) # save file history_f1 = pd.DataFrame(self.history, columns=['epoch', 'presicion', 'recall', 'f1 score']) history_f1.to_csv(self.path_csv, index=False) if f1_score > self.best_f1: # Save model print('Improve F1 score from', self.best_f1, 'to', f1_score) self.best_f1 = f1_score self.model.save(self.path_save)