def parse_annotations(self, annotation, id): image_path = './train_car_person/' + str(id) + '.jpg' image = np.array(cv2.imread(image_path)) bboxes = [] for ann in annotation['annotations']: if ann['image_id'] == id: x_top_left = ann['bbox'][0] y_top_left = ann['bbox'][1] w = ann['bbox'][2] h = ann['bbox'][3] c = ann['category_id'] if c != 1 and c != 3: continue elif c == 1: c = 0 else: c = 1 x_bottom_right = x_top_left + w y_bottom_right = y_top_left + h x_top_left, y_top_left, x_bottom_right, y_bottom_right, c = int( x_top_left), int(y_top_left), int(x_bottom_right), int( y_bottom_right), int(c) bboxes.append([ x_top_left, y_top_left, x_bottom_right, y_bottom_right, c ]) bboxes = np.array(bboxes) # print("bboxes before preprocess: ", bboxes) image, bboxes = utils.image_preprocess( image, [self.train_input_size, self.train_input_size], np.copy(bboxes)) # print("bboxes after preprocess: ", bboxes) return image, bboxes
def parse_annotation(self, annotation): line = annotation.split() image_path = line[0] if not os.path.exists(image_path): raise KeyError("%s does not exist ... " % image_path) image = cv2.imread(image_path) if self.dataset_type == "converted_coco": bboxes = np.array( [list(map(int, box.split(","))) for box in line[1:]]) elif self.dataset_type == "yolo": height, width, _ = image.shape bboxes = np.array( [list(map(float, box.split(","))) for box in line[1:]]) bboxes = bboxes * np.array([width, height, width, height, 1]) bboxes = bboxes.astype(np.int64) if self.data_aug: image, bboxes = self.random_horizontal_flip( np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image, bboxes = utils.image_preprocess( np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes), ) return image, bboxes
def parse_annotaion(self, annotation, mAP='False'): if TRAIN_LOAD_IMAGES_TO_RAM: img_path = annotation[0] image = annotation[2] else: img_path = annotation[0] image = cv2.imread(img_path) bboxes = np.array( [list(map(int, box.split(','))) for box in annotation[1]]) if self.data_aug: image, bboxes = self.random_horzontal_flip(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) if mAP == True: return image, bboxes image, bboxes = image_preprocess(np.copy(image), [self.input_sizes, self.input_sizes], np.copy(bboxes)) return image, bboxes
def predict(self): np.set_printoptions(threshold=np.inf) image_path = './414162.jpg' image = np.array(cv2.imread(image_path)) image_shape = image.shape print("image_shape: ", image_shape) image = np.copy(image) image_data = utils.image_preprocess(image, [self.input_size, self.input_size]) image_data = image_data[np.newaxis, ...] pred_bbox = self.sess.run([self.pred_bbox], feed_dict={ self.input: image_data, self.training: False }) pred_bbox = np.array(pred_bbox[0]) pred_bbox = utils.postprocess_boxes(pred_bbox, image_shape, 416, 0.5) print("pred_bbox shape: ", pred_bbox.shape) pred_bbox = utils.nms(pred_bbox, 0.45) print("pred_bbox after: ", pred_bbox) image = utils.draw_bbox(image, pred_bbox, show_label=True) cv2.imwrite('./test.jpg', image)
def detect_image(self, image_path=None, output_path=None, input_size=416, show=False, score_threshold=0.3, iou_threshold=0.45, rectangle_colors=''): if image_path is not None: original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = tf.expand_dims(image_data, 0) # it gives output in three different scale pred_bbox = self.tiny_YoloV3.predict(image_data) print(pred_bbox[0].shape) print(pred_bbox[1].shape) pred_bbox = [ tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox ] pred_bbox = tf.concat(pred_bbox, axis=0) # print(pred_bbox) bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold) print(bboxes.shape) bboxes = nms(bboxes, iou_threshold, method='nms') print(bboxes[0].shape) print(len(bboxes)) image = draw_bbox(original_image, bboxes, CLASSES=self.CLASSES, rectangle_colors=rectangle_colors) # print(image.shape) if output_path is not None: cv2.imwrite(output_path, image) if show: # Show the image cv2.imshow("predicted image", image) # Load and hold the image cv2.waitKey(0) # To close the window after the required kill value was provided cv2.destroyAllWindows() return image
def parse_annotation(self,annotation): line = annotation.split() image_path = line[0] if not os.path.exists(image_path): raise KeyError("s% does not exist"%image_path) image = np.array(cv2.imread(image_path)) bboxes = np.array([list(map(lambda x:int(float(x)),box.split(','))) for box in line[1:]]) if self.data_aug: image,bboxes = self.random_horizotal_flip(np.copy(image),np.copy(bboxes)) image,bboxes = self.random_crop(np.copy(image),np.copy(bboxes)) image,bboxes = self.random_translate(np.copy(image),np.copy(bboxes)) image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) image,bboxes = utils.image_preprocess(np.copy(image), [self.train_input_size,self.train_input_size],np.copy(bboxes)) return image,bboxes
def telemetry(sid, data): # The current steering angle of the car steering_angle = data["steering_angle"] # The current throttle of the car throttle = data["throttle"] # The current speed of the car speed = data["speed"] # The current image from the center camera of the car imgString = data["image"] image = Image.open(BytesIO(base64.b64decode(imgString))) image_pre = np.asarray(image) new_image = utils.image_preprocess(image_pre, 64, 64) transformed_image_array = new_image[None, :, :, :] # This model currently assumes that the features of the model are just the images. Feel free to change this. steering_angle = 1 * float( model.predict(transformed_image_array, batch_size=1)) # The driving model currently just outputs a constant throttle. Feel free to edit this. throttle = 0.05 print(steering_angle, throttle) send_control(steering_angle, throttle)
return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements) with tf.Session(graph=graph) as sess: vid = cv2.VideoCapture(video_path) success, frame = vid.read() size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))) # VideoWriter_fourcc为视频编解码器,20为帧播放速率 # fourcc = cv2.VideoWriter_fourcc(*'DIVX') fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') out = cv2.VideoWriter('output_3.mp4', fourcc, 20.0, size) num_frame = 0 while success: frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.4)
import utils as utils import tensorflow as tf from PIL import Image return_elements = ["input/input_data:0", "yolo_v3_model/pred_sbbox/concat_2:0", "yolo_v3_model/pred_mbbox/concat_2:0", "yolo_v3_model/pred_lbbox/concat_2:0"] pb_file = "./yolov3_coco.pb" image_path = "./576527.jpg" num_classes = 2 input_size = 416 graph = tf.Graph() original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements) with tf.Session(graph=graph) as sess: pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={ return_tensors[0]: image_data}) print("pred_bbox: ", pred_sbbox) print("pred_bbox shape: ", np.array(pred_sbbox).shape) pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0)
def detect_video(self, video_path, output_path=None, input_size=416, show=False, score_threshold=0.3, iou_threshold=0.45, rectangle_colors=''): times = [] vid = cv2.VideoCapture(video_path) # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') # output_path must be .mp4 out = cv2.VideoWriter(output_path, codec, fps, (width, height)) while True: _, img = vid.read() try: original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) except: break image_data = image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = tf.expand_dims(image_data, 0) t1 = time.time() pred_bbox = self.tiny_YoloV3.predict(image_data) t2 = time.time() pred_bbox = [ tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox ] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') times.append(t2 - t1) times = times[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps)) image = draw_bbox(original_image, bboxes, CLASSES=self.CLASSES, rectangle_colors=rectangle_colors) image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) if output_path is not None: out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows()
def load_data(self, path, resize_height, resize_width, normalization): image = image_preprocess(path, resize_height, resize_width, normalization) return image
def _main_(): input_path = args.input uff_fpath = 'TensorRT/uff/{}.uff'.format(config.NET_BASENAME) engine_fpath = 'TensorRT/engines/{}.trt'.format(config.NET_BASENAME) if os.path.exists(engine_fpath): engine = tensorNet.createTrtFromPlan(engine_fpath) else: if os.path.exists(uff_fpath): engine = tensorNet.createTrtFromUFF(uff_fpath, config.INPUT_TENSOR_NAMES[0], 'activation_1/Sigmoid') tensorNet.saveEngine(engine, engine_fpath) else: print('No .uff file!') exit(1) image_paths = [] if os.path.isdir(input_path): for inp_file in os.listdir(input_path): image_paths += [os.path.join(input_path, inp_file)] else: image_paths += [input_path] image_paths = [ inp_file for inp_file in image_paths if (inp_file[-4:] in ['.jpg', '.png', 'JPEG', '.ppm']) ] processing_count = 0 sum_time = 0 network_input_shp = (config.NETWORK_INPUT_W, config.NETWORK_INPUT_H, config.NETWORK_INPUT_C) render_mode = True for image_path in tqdm(image_paths): image = cv2.imread(image_path) start_time = time.time() image_h, image_w, _ = image.shape input_img = image_preprocess(image, network_input_shp) # Convert 2 CHW image_chw = np.moveaxis(input_img, -1, 0) image_chw = np.ascontiguousarray(image_chw, dtype=np.float32) tensorNet.inference(engine, image_chw) mask_result = np.zeros((160, 320, 1), dtype=np.float32) print(mask_result.shape) tensorNet.getOutput(engine, 0, mask_result) image_chw = np.moveaxis(input_img, -1, 0) sum_time += time.time() - start_time processing_count += 1 if render_mode: cv2.imshow('result', np.uint8(mask_result)) cv2.imshow('input', image) if cv2.waitKey(0) == 27: break # esc to quit fps = processing_count / sum_time print('Result: {}'.format(fps))
def get_mAP(Yolo, dataset, score_threshold=0.25, iou_threshold=0.50, TEST_INPUT_SIZE=TEST_INPUT_SIZE): MINOVERLAP = 0.5 # default value (defined in the PASCAL VOC2012 challenge) NUM_CLASS = read_class_names(TRAIN_CLASSES) ground_truth_dir_path = 'mAP/ground-truth' if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path) if not os.path.exists('mAP'): os.mkdir('mAP') os.mkdir(ground_truth_dir_path) print(f'\ncalculating mAP{int(iou_threshold*100)}...\n') gt_counter_per_class = {} for index in range(dataset.num_samples): ann_dataset = dataset.annotations[index] original_image, bbox_data_gt = dataset.parse_annotation( ann_dataset, True) if len(bbox_data_gt) == 0: bboxes_gt = [] classes_gt = [] else: bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4] ground_truth_path = os.path.join(ground_truth_dir_path, str(index) + '.txt') num_bbox_gt = len(bboxes_gt) bounding_boxes = [] for i in range(num_bbox_gt): class_name = NUM_CLASS[classes_gt[i]] xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) bbox = xmin + " " + ymin + " " + xmax + " " + ymax bounding_boxes.append({ "class_name": class_name, "bbox": bbox, "used": False }) # count that object if class_name in gt_counter_per_class: gt_counter_per_class[class_name] += 1 else: # if class didn't exist yet gt_counter_per_class[class_name] = 1 bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n' with open(f'{ground_truth_dir_path}/{str(index)}_ground_truth.json', 'w') as outfile: json.dump(bounding_boxes, outfile) gt_classes = list(gt_counter_per_class.keys()) # sort the classes alphabetically gt_classes = sorted(gt_classes) n_classes = len(gt_classes) times = [] json_pred = [[] for i in range(n_classes)] for index in range(dataset.num_samples): ann_dataset = dataset.annotations[index] image_name = ann_dataset[0].split('/')[-1] original_image, bbox_data_gt = dataset.parse_annotation( ann_dataset, True) image = image_preprocess(np.copy(original_image), [TEST_INPUT_SIZE, TEST_INPUT_SIZE]) image_data = image[np.newaxis, ...].astype(np.float32) t1 = time.time() if YOLO_FRAMEWORK == "tf": pred_bbox = Yolo.predict(image_data) elif YOLO_FRAMEWORK == "trt": batched_input = tf.constant(image_data) result = Yolo(batched_input) pred_bbox = [] for key, value in result.items(): value = value.numpy() pred_bbox.append(value) t2 = time.time() times.append(t2 - t1) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_image, TEST_INPUT_SIZE, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') for bbox in bboxes: coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) class_name = NUM_CLASS[class_ind] score = '%.4f' % score xmin, ymin, xmax, ymax = list(map(str, coor)) bbox = xmin + " " + ymin + " " + xmax + " " + ymax json_pred[gt_classes.index(class_name)].append({ "confidence": str(score), "file_id": str(index), "bbox": str(bbox) }) ms = sum(times) / len(times) * 1000 fps = 1000 / ms for class_name in gt_classes: json_pred[gt_classes.index(class_name)].sort( key=lambda x: float(x['confidence']), reverse=True) with open(f'{ground_truth_dir_path}/{class_name}_predictions.json', 'w') as outfile: json.dump(json_pred[gt_classes.index(class_name)], outfile) # Calculate the AP for each class sum_AP = 0.0 ap_dictionary = {} # open file to store the results with open("mAP/results.txt", 'w') as results_file: results_file.write("# AP and precision/recall per class\n") count_true_positives = {} for class_index, class_name in enumerate(gt_classes): count_true_positives[class_name] = 0 # Load predictions of that class predictions_file = f'{ground_truth_dir_path}/{class_name}_predictions.json' predictions_data = json.load(open(predictions_file)) # Assign predictions to ground truth objects nd = len(predictions_data) tp = [0] * nd # creates an array of zeros of size nd fp = [0] * nd for idx, prediction in enumerate(predictions_data): file_id = prediction["file_id"] # assign prediction to ground truth object if any # open ground-truth with that file_id gt_file = f'{ground_truth_dir_path}/{str(file_id)}_ground_truth.json' ground_truth_data = json.load(open(gt_file)) ovmax = -1 gt_match = -1 # load prediction bounding-box bb = [float(x) for x in prediction["bbox"].split() ] # bounding box of prediction for obj in ground_truth_data: # look for a class_name match if obj["class_name"] == class_name: bbgt = [float(x) for x in obj["bbox"].split() ] # bounding box of ground truth bi = [ max(bb[0], bbgt[0]), max(bb[1], bbgt[1]), min(bb[2], bbgt[2]), min(bb[3], bbgt[3]) ] iw = bi[2] - bi[0] + 1 ih = bi[3] - bi[1] + 1 if iw > 0 and ih > 0: # compute overlap (IoU) = area of intersection / area of union ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + ( bbgt[2] - bbgt[0] + 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih ov = iw * ih / ua if ov > ovmax: ovmax = ov gt_match = obj # assign prediction as true positive/don't care/false positive if ovmax >= MINOVERLAP: # if ovmax > minimum overlap if not bool(gt_match["used"]): # true positive tp[idx] = 1 gt_match["used"] = True count_true_positives[class_name] += 1 # update the ".json" file with open(gt_file, 'w') as f: f.write(json.dumps(ground_truth_data)) else: # false positive (multiple detection) fp[idx] = 1 else: # false positive fp[idx] = 1 # compute precision/recall cumsum = 0 for idx, val in enumerate(fp): fp[idx] += cumsum cumsum += val cumsum = 0 for idx, val in enumerate(tp): tp[idx] += cumsum cumsum += val #print(tp) rec = tp[:] for idx, val in enumerate(tp): rec[idx] = float(tp[idx]) / gt_counter_per_class[class_name] #print(rec) prec = tp[:] for idx, val in enumerate(tp): prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx]) #print(prec) ap, mrec, mprec = voc_ap(rec, prec) sum_AP += ap text = "{0:.3f}%".format( ap * 100 ) + " = " + class_name + " AP " #class_name + " AP = {0:.2f}%".format(ap*100) rounded_prec = ['%.3f' % elem for elem in prec] rounded_rec = ['%.3f' % elem for elem in rec] # Write to results.txt results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n") print(text) ap_dictionary[class_name] = ap results_file.write("\n# mAP of all classes\n") mAP = sum_AP / n_classes text = "mAP = {:.3f}%, {:.2f} FPS".format(mAP * 100, fps) results_file.write(text + "\n") print(text) return mAP * 100