def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) img = Image.open(FLAGS.input_img) img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) classes = load_coco_names(FLAGS.class_names) if FLAGS.frozen_model: t0 = time.time() frozenGraph = load_graph(FLAGS.frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) #print(frozenGraph.inputs) #print(frozenGraph.outputs) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) with tf.Session(graph=frozenGraph, config=config) as sess: t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) else: if FLAGS.tiny: model = yolo_v3_tiny.yolo_v3_tiny elif FLAGS.spp: model = yolo_v3.yolo_v3_spp else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size, FLAGS.data_format) saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session(config=config) as sess: t0 = time.time() saver.restore(sess, FLAGS.ckpt_file) print('Model restored in {:.2f}s'.format(time.time() - t0)) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True) img.save(FLAGS.output_img)
def __init__(self, tiny=False, cls_path='coco.names', img_size=(416, 416), data_format='NHWC', frozen_model='', ckpt_path='saved_model/model.ckpt', conf_threshold=0.5, iou_threshold=0.4, gpu_memory_fraction=0.2, is_training=False): """ Wrapper class for the YOLO v3 detector. :param tiny: if you want to use tiny yolo :param cls_path: file storing detection classes :param img_size: tuple storing image size :param data_format: Data format: NCHW (gpu only) / NHWC :param ckpt_path: path to model checkpoint file """ self.gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) self.config = tf.ConfigProto(gpu_options=self.gpu_options, log_device_placement=True) self.is_training = is_training self.frozen_model = frozen_model self.gpu_memory_fraction = gpu_memory_fraction self.tiny = tiny self.size = img_size self.data_format = data_format self.ckpt_file = ckpt_path self.conf_threshold = conf_threshold self.iou_threshold = iou_threshold if self.tiny: self.model = yolo_v3_tiny.yolo_v3_tiny else: self.model = yolo_v3.yolo_v3 self.classes = load_coco_names(cls_path) self.boxes, self.inputs = get_boxes_and_inputs(self.model, len(self.classes), self.size, self.data_format) self.saver = tf.train.Saver(var_list=tf.global_variables( scope='detector')) self.sess = tf.Session(config=self.config) t0 = time.time() self.saver.restore(self.sess, self.ckpt_file) print('Model restored in {:.2f}s'.format(time.time() - t0))
def __init__(self): config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.5 self.classes = load_coco_names('coco.names') self.model = yolo_v3.yolo_v3 self.boxes, self.inputs = get_boxes_and_inputs(self.model, len(self.classes), 416, 'NHWC') self.saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) self.sess = tf.Session(config=config) t0 = time.time() self.saver.restore(self.sess, './saved_model/model.ckpt') print('Model restored in {:.3f}s'.format(time.time()-t0))
def main(argv=None): img = Image.open('out/images/19.png') # img = Image.open('city.png') img_resized = letter_box_image(img, size, size, 128) img_resized = img_resized.astype(np.float32) classes = load_coco_names('coco.names') if frozen_model: t0 = time.time() frozenGraph = load_graph(frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) with tf.Session(graph=frozenGraph) as sess: t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) else: if tiny: model = yolo_v3_tiny.yolo_v3_tiny else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), size, data_format) saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session() as sess: t0 = time.time() saver.restore(sess, ckpt_file) print('Model restored in {:.2f}s'.format(time.time() - t0)) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression(detected_boxes, confidence_threshold=conf_threshold, iou_threshold=iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) draw_boxes(filtered_boxes, img, classes, (size, size), True) img.save('out_check.png')
def main(argv=None): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) img = Image.open(FLAGS.input_img) classes = load_coco_names(FLAGS.class_names) if FLAGS.frozen_model: t0 = time.time() frozenGraph = load_graph(FLAGS.frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) with tf.Session(graph=frozenGraph, config=config) as sess: t0 = time.time() show_camera(sess, boxes, inputs) else: if FLAGS.tiny: model = yolo_v3_tiny.yolo_v3_tiny else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size, FLAGS.data_format) saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session(config=config) as sess: t0 = time.time() saver.restore(sess, FLAGS.ckpt_file) print('Model restored in {:.2f}s'.format(time.time() - t0)) t0 = time.time()
def get_score_from_image(img_fp, gpu_options, config, model): img = Image.open(img_fp) img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) classes = load_coco_names(FLAGS.class_names) inference_start_time = time.time() if FLAGS.frozen_model: boxes, inputs = get_boxes_and_inputs_pb(model) with tf.Session(graph=model, config=config) as sess: detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) else: if FLAGS.tiny: model = yolo_v3_tiny.yolo_v3_tiny elif FLAGS.spp: model = yolo_v3.yolo_v3_spp else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size, FLAGS.data_format) saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session(config=config) as sess: saver.restore(sess, FLAGS.ckpt_file) detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) total_inference_time = time.time() - inference_start_time filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) return get_person_scores(filtered_boxes, classes), round(total_inference_time * 1000, 3)
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=cfg.GPU_MEMORY_FRACTION) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) classes = load_coco_names(cfg.CLASS_NAME) if cfg.FROZEN_MODEL: pass # # t0 = time.time() # frozenGraph = load_graph(cfg.FROZEN_MODEL) # print("Loaded graph in {:.2f}s".format(time.time()-t0)) # # boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) # # with tf.Session(graph=frozenGraph, config=config) as sess: # t0 = time.time() # detected_boxes = sess.run( # boxes, feed_dict={inputs: [img_resized]}) else: if cfg.TINY: model = yolo_v3_tiny.yolo_v3_tiny else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), cfg.IMAGE_SIZE, cfg.DATA_FORMAT) # boxes : coordinates of top left and bottom right points. saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) # # for specific object recognition # vgg16_image_size = vgg_16.default_image_size s_class_names = cfg.S_CLASS_PATH s_classes = [l.split(" ") for l in open(s_class_names, "r")] if len(s_classes[0]): # classフォーマットが "id classname"の場合 s_labels = {int(l[0]): l[1].replace("\n", "") for l in s_classes} else: # classフォーマットが "classname"のみの場合 s_labels = { i: l.replace("\n", "") for i, l in enumerate(s_classes) } num_classes_s = len(s_labels.keys()) num_classes_extractor = cfg.S_EXTRACTOR_NUM_OF_CLASSES s_model = cfg.S_CKPT_FILE extractor_name = cfg.S_EXTRACTOR_NAME # specific_pred, [cropped_images_placeholder, original_images_placeholder, keep_prob, is_training] = specific_object_recognition(vgg16_image_size, num_classes_s, num_classes_extractor, extractor_name) specific_pred, [cropped_images_placeholder, keep_prob, is_training] = specific_object_recognition( vgg16_image_size, num_classes_s) variables_to_restore = slim.get_variables_to_restore( include=["vgg_16"]) restorer = tf.train.Saver(variables_to_restore) with tf.Session(config=config) as sess: t0 = time.time() saver.restore(sess, cfg.CKPT_FILE) print('YOLO v3 Model restored in {:.2f}s'.format(time.time() - t0), "from:", cfg.CKPT_FILE) t0 = time.time() restorer.restore(sess, s_model) print( 'Specific object recognition Model restored in {:.2f}s'.format( time.time() - t0), "from:", s_model) # prepare test set with open(cfg.TEST_FILE_PATH, 'r') as f: f_ = [line.rstrip().split() for line in f] data = [ [l, get_annotation(l[0], txtname=cfg.GT_INFO_FILE_NAME)] for l in f_ ] # data: [[(path_str, label), [frame, center_x, center_y, size_x, size_y]],...] data = [l for l in data if l[1] is not None] # annotationを取得できなかった画像は飛ばす def is_cropped_file_Exist(orig_filepath): d, file = os.path.split(orig_filepath) cropped_d = d + "_cropped" cropped_file = os.path.join(cropped_d, file) return os.path.exists(cropped_file) data = [l for l in data if is_cropped_file_Exist(l[0][0])] # 対となるcrop画像がない画像は飛ばす # log f = open(cfg.OUTPUT_LOG_PATH, 'w') writer = csv.writer(f, lineterminator='\n') writer.writerow([ 'image path', 'movie_name', 'IoU', 'Average Precision', 'Recall', 'is RoI detected?', 'is label correct?', 'gt label', 'pred label', 'detect time', 'recog time' ]) iou_list = [] # 画像毎のiouのリスト ap_list = [] # 画像毎のaverage precisionのリスト # iterative run for count, gt in enumerate( data ): # gt: [(path_str, label), [frame, center_x, center_y, size_x, size_y] # for evaluation gt_box = [float(i) for i in gt[1][1:]] gt_box = [ gt_box[0] - (gt_box[2] / 2), gt_box[1] - (gt_box[3] / 2), gt_box[0] + (gt_box[2] / 2), gt_box[1] + (gt_box[3] / 2) ] gt_label = int(gt[0][1]) ious = [] precisions = [] print(count, ":", gt[0][0]) img = Image.open(gt[0][0]) img_resized = letter_box_image(img, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE, 128) img_resized = img_resized.astype(np.float32) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=cfg.CONF_THRESHOLD, iou_threshold=cfg.IOU_THRESHOLD) detect_time = time.time() - t0 print("detected boxes in :{:.2f}s ".format(detect_time), filtered_boxes) # specific object recognition! np_img = np.array(img) / 255 target_label = 0 # seesaaの場合 (データセットのクラス番号毎にここを変える.) if len(filtered_boxes.keys()) != 0: # 何かしら検出された時 is_detected = True for cls, bboxs in filtered_boxes.items(): if cls == target_label: # ターゲットラベルなら print("target class detected!") bounding_boxes = [] bboxs_ = copy.deepcopy( bboxs ) # convert_to_original_size()がbboxを破壊してしまうため for box, score in bboxs: orig_size_box = convert_to_original_size( box, np.array((cfg.IMAGE_SIZE, cfg.IMAGE_SIZE)), np.array(img.size), True) # print(orig_size_box) cropped_image = np_img[ int(orig_size_box[1]):int(orig_size_box[3] ), int(orig_size_box[0]):int(orig_size_box[2] )] bounding_boxes.append(cropped_image) # input_original = cv2.resize(padding(np_img), (vgg16_image_size, vgg16_image_size)) # input_original = np.tile(input_original, (len(bounding_boxes), 1, 1, 1)) # croppedと同じ枚数分画像を重ねる cropped_images = [] for bbox in bounding_boxes: cropped_images.append( cv2.resize( padding(bbox), (vgg16_image_size, vgg16_image_size))) input_cropped = np.asarray(cropped_images) t0 = time.time() pred = sess.run(specific_pred, feed_dict={ cropped_images_placeholder: input_cropped, keep_prob: 1.0, is_training: False }) recog_time = time.time() - t0 print("Predictions found in {:.2f}s".format( recog_time)) pred_label = [s_labels[i] for i in pred.tolist() ] # idからクラス名を得る classes = [ s_labels[i] for i in range(num_classes_s) ] filtered_boxes = {} for i, n in enumerate(pred.tolist()): if n in filtered_boxes.keys(): filtered_boxes[n].extend([bboxs_[i]]) else: filtered_boxes[n] = [bboxs_[i]] # calc IoU, mAP # gt: [(path_str, label), [frame, center_x, center_y, size_x, size_y] # print(filtered_boxes) iou = 0.0 for key in filtered_boxes.keys(): for pred_box in filtered_boxes[key]: p_box = copy.deepcopy(pred_box[0]) orig_scale_p_box = convert_to_original_size( p_box, np.array( (cfg.IMAGE_SIZE, cfg.IMAGE_SIZE)), np.array(img.size), True) conf = pred_box[1] # print(gt_label, key) if key == gt_label: # 予測したクラスがGTと同じの時 # print(orig_scale_p_box, gt_box) iou = _iou( orig_scale_p_box, gt_box ) # :param box1: array of 4 values (top left and bottom right coords): [x0, y0, x1, x2] precision = calc_precision( orig_scale_p_box, gt_box) is_label_correct = True else: iou = 0.0 precision = 0.0 is_label_correct = False # print("IoU:", iou) ious.append(iou) print("Precision:", precision) precisions.append(precision) else: # ターゲットラベルじゃない時 pass else: #何も検出されなかった時 is_detected = False is_label_correct = "None" pred_label = ["None"] average_iou = sum(ious) / (len(ious) + 1e-05) # 画像一枚のiou print("average IoU:", average_iou) iou_list.append(average_iou) print("mean average IoU:", sum(iou_list) / (len(iou_list) + 1e-05)) ap = sum(precisions) / (len(precisions) + 1e-05) ap_list.append(ap) print("Average Precision:", ap) print("mean Average Precision:", sum(ap_list) / (len(ap_list) + 1e-05)) draw_boxes(filtered_boxes, img, classes, (cfg.IMAGE_SIZE, cfg.IMAGE_SIZE), True) # draw GT draw = ImageDraw.Draw(img) color = (0, 0, 0) draw.rectangle(gt_box, outline=color) draw.text(gt_box[:2], 'GT_' + s_labels[gt_label], fill=color) img.save( os.path.join( cfg.OUTPUT_IMAGE_DIR, '{0:04d}_'.format(count) + os.path.basename(gt[0][0]))) writer.writerow([ gt[0][0], os.path.basename(os.path.dirname(gt[0][0])), average_iou, ap, 'Recall', is_detected, is_label_correct, s_labels[gt_label], pred_label[0], detect_time, recog_time ]) f.close() print("proc finished.")
def main(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) #----------- Initialization -------------- # Settings data+ following initializations classes = load_coco_names(class_names) cap = cv2.VideoCapture('video.avi') ret, _ = cap.read() plt.ion() frame_index = 0 # defining model if frozen_model: #The protobuf file contains the graph definition as well as the weights of the model. t0 = time.time() # loading model and related weights frozenGraph = load_graph(frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) with tf.device("/GPU:0"): with tf.Session(graph=frozenGraph, config=config) as sess: # Is there any frame to read? while ret: frame_index += 1 ret, frame = cap.read() # applying transformation and apropriate changes to frame to feed the loaded model img = Image.fromarray( cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) img_resized = letter_box_image(img, size, size, 128) img_resized = img_resized.astype(np.float32) t0 = time.time() # feeding tensor to loaded model detected_boxes = sess.run( boxes, feed_dict={inputs: [img_resized]}) #obtaining the bounding boxes of detected objects filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=conf_threshold, iou_threshold=iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) #croping and extracting bounding boxes of detected objects in frame rois = draw_boxes(filtered_boxes, img, classes, (size, size), True) if len(rois) > 0: for i in range(len(rois)): # saving the cropped images in Hard Disk = './extracted_regions/' Directory rois[i].save('./extracted_regions/frame' + str(frame_index) + '_ExtObj_' + str(i) + '.jpg') plt.imshow(np.array(img)) plt.pause(0.02) plt.show() else: # using ckpt file for loading the model weights #----------- Initialization -------------- saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) cap = cv2.VideoCapture('video.avi') ret, _ = cap.read() plt.ion() t0 = time.time() frame_index = 0 # loading model and related weights if tiny: model = yolo_v3_tiny.yolo_v3_tiny else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), size, data_format) t0 = time.time() saver.restore(sess, ckpt_file) print('Model restored in {:.2f}s'.format(time.time() - t0)) with tf.Session(config=config) as sess: # is there any frame to read? while ret: frame_index += 1 ret, frame = cap.read() # applying transformation and apropriate changes to frame to feed the loaded model img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) img_resized = letter_box_image(img, size, size, 128) img_resized = img_resized.astype(np.float32) t0 = time.time() # feeding tensor to loaded model detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) #obtaining the bounding boxes of detected objects filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=conf_threshold, iou_threshold=iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) #croping and extracting bounding boxes of detected objects rois = draw_boxes(filtered_boxes, img, classes, (size, size), True) if len(rois) > 0: for i in range(len(rois)): # saving the cropped images in Hard Disk = './extracted_regions/' Directory rois[i].save('./extracted_regions/frame' + str(frame_index) + '_ExtObj_' + str(i) + '.jpg') plt.imshow(np.array(img)) plt.pause(0.02) plt.show()
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) # import sys # result=[] # with open("VOC2007/ImageSets/Main/test.txt",'r') as f: # for line in f: # result.append(f) # print(result) # with open("VOC2007/ImageSets/Main/test.txt", 'r') as f: # line =[] # while True: # line = f.readline() # 逐行读取 # if not line: # break # print(line) # 这里加了 ',' 是为了避免 print 自动换行 results = [] f = open("VOC2007/ImageSets/Main/test.txt", "r") lines = f.readlines() #读取全部内容 ,并以列表方式返回 for line in lines: results.append(line.strip('\n').split(',')[0]) # if FLAGS.frozen_model: # t0 = time.time() # frozenGraph = load_graph(FLAGS.frozen_model) # print("Loaded graph in {:.2f}s".format(time.time()-t0)) # boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) # with tf.Session(graph=frozenGraph, config=config) as sess: # t0 = time.time() # detected_boxes = sess.run( # boxes, feed_dict={inputs: [img_resized]}) # else: # if FLAGS.tiny: # model = yolo_v3_tiny.yolo_v3_tiny # elif FLAGS.spp: # model = yolo_v3.yolo_v3_spp # else: model = yolo_v3.yolo_v3 classes = load_coco_names(FLAGS.class_names) boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size, FLAGS.data_format) saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session(config=config) as sess: t0 = time.time() saver.restore(sess, FLAGS.ckpt_file) print('Model restored in {:.2f}s'.format(time.time() - t0)) t0 = time.time() # file_list = os.listdir('input/') for file in results: try: print('VOC2007/JPEGImages/' + str(file) + '.jpg') image = cv2.imread('VOC2007/JPEGImages/' + str(file) + '.jpg') print(image.shape) img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # img = Image.open('VOC2007/JPEGImages/'+str(file)+'.jpg') img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True) img.save('output/' + file + '.jpg') except ValueError: pass
def main(argv=None): img = Image.open('city.png') img_resized = letter_box_image(img, size, size, 128) img_resized = img_resized.astype(np.float32) classes = load_coco_names('coco.names') fake_boxes = {2: [(np.array([300, 200, 370, 250]), 1.)]} generated_boxes, g_indices = generate_ground_truth(fake_boxes, size, 0.4) draw_boxes(copy.deepcopy(generated_boxes), img, classes, (size, size), True) draw_boxes(copy.deepcopy(fake_boxes), img, classes, (size, size), True) # draw_boxes(filtered_boxes, img, classes, (size, size), True) img.save('out_fakeboxes.jpg') mask = np.zeros([1, 10647]) for cls, indices in g_indices.items(): mask[0, indices] = 1 gt_tensor = np.zeros([1, 10647, 4 + 1 + len(classes)]) for cls, boxes in generated_boxes.items(): for i, box in enumerate(boxes): class_mask = np.zeros([len(classes)]) class_mask[cls] = 1 gt_row = [*np.asarray(box[0]), 1., *class_mask] gt_tensor[0, g_indices[cls][i]] = gt_row if frozen_model: t0 = time.time() frozenGraph = load_graph(frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) with frozenGraph.as_default(): fake_gt = tf.constant(gt_tensor, dtype=tf.float32) mask_tensor = tf.constant(mask, dtype=tf.float32) fake_loss = mse(fake_gt, boxes) * mask_tensor fake_loss = tf.reduce_mean(fake_loss, axis=-1) grad_op = tf.gradients(fake_loss, inputs) with tf.Session(graph=frozenGraph) as sess: t0 = time.time() for iters in range(num_iterations): grads = sess.run(grad_op, feed_dict={inputs: [img_resized]}) grad = grads[0][0] sigma = (iters * 4.0) / num_iterations + 0.5 grad_smooth1 = gaussian_filter(grad, sigma=sigma) grad_smooth2 = gaussian_filter(grad, sigma=sigma * 2) grad_smooth3 = gaussian_filter(grad, sigma=sigma * 0.5) grad = (grad_smooth1 + grad_smooth2 + grad_smooth3) step_size_scaled = step_size / (np.std(grad) + 1e-8) # Update the image by following the gradient. mod = grad * step_size_scaled grad_img = Image.fromarray(np.uint8(mod + 128)) grad_img.save('out/grads/{}.png'.format(iters)) img_resized = np.clip(img_resized - mod, 0, 255) new_img = Image.fromarray(np.uint8(img_resized)) new_img.save('out/images/{}.png'.format(iters)) else: if tiny: model = yolo_v3_tiny.yolo_v3_tiny else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), size, data_format) saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session() as sess: t0 = time.time() saver.restore(sess, ckpt_file) print('Model restored in {:.2f}s'.format(time.time() - t0)) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=cfg.GPU_MEMORY_FRACTION) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) classes = load_coco_names(cfg.CLASS_NAME) if cfg.FROZEN_MODEL: pass else: if cfg.TINY: model = yolo_v3_tiny.yolo_v3_tiny else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), cfg.IMAGE_SIZE, cfg.DATA_FORMAT) # boxes : coordinates of top left and bottom right points. saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session(config=config) as sess: t0 = time.time() saver.restore(sess, cfg.CKPT_FILE) print('YOLO v3 Model restored in {:.2f}s'.format(time.time() - t0), "from:", cfg.CKPT_FILE) # prepare test set with open(cfg.TEST_FILE_PATH, 'r') as f: f_ = [line.rstrip().split() for line in f] data = [ [l, get_annotation(l[0], txtname=cfg.GT_INFO_FILE_NAME)] for l in f_ ] # data: [[(path_str, label), [frame, center_x, center_y, size_x, size_y]],...] data = [l for l in data if l[1] is not None] # annotationを取得できなかった画像は飛ばす def is_cropped_file_Exist(orig_filepath): d, file = os.path.split(orig_filepath) cropped_d = d + "_cropped" cropped_file = os.path.join(cropped_d, file) return os.path.exists(cropped_file) data = [l for l in data if is_cropped_file_Exist(l[0][0])] # 対となるcrop画像がない画像は飛ばす # log f = open(cfg.OUTPUT_LOG_PATH, 'w') writer = csv.writer(f, lineterminator='\n') writer.writerow([ 'image path', 'class/movie_name', 'IoU', 'TP', 'FP', 'FN', 'Average Precision', 'gt label', ' highest_conf_label', 'detect time' ]) total_iou = [] # 画像毎のiouのリスト total_tp = 0 # TP : IoU > 0.5かつ GT==Pred_classのPositiveの数 total_fp = 0 # FP : TPの条件以外のPositivesの数 total_fn = 0 # FN : 検出されなかったGT total_ap = [ ] # 画像毎のaverage precisionのリスト.AP : TP / total positives # iterative run for count, gt in enumerate( data ): # gt: [(path_str, label), [frame, center_x, center_y, size_x, size_y] iou = 0.0 tp = 0 fp = 0 fn = 0 ap = 0.0 # for evaluation gt_box = [float(i) for i in gt[1][1:]] gt_box = [ gt_box[0] - (gt_box[2] / 2), gt_box[1] - (gt_box[3] / 2), gt_box[0] + (gt_box[2] / 2), gt_box[1] + (gt_box[3] / 2) ] gt_label = int(gt[0][1]) # GT のクラス gt_anno = {gt_label: gt_box} print(count, ":", gt[0][0]) img = Image.open(gt[0][0]) img_resized = letter_box_image(img, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE, 128) img_resized = img_resized.astype(np.float32) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=cfg.CONF_THRESHOLD, iou_threshold=cfg.IOU_THRESHOLD) detect_time = time.time() - t0 print("detected boxes in :{:.2f}s ".format(detect_time), filtered_boxes) print(filtered_boxes) if len(filtered_boxes.keys()) != 0: # 何かしら検出された時 [tp, fp, fn], iou, precision, highest_conf_label = evaluate( filtered_boxes, gt_anno, img, thresh=0.5) # 一枚の画像の評価を行う else: # 何も検出されなかった時 iou = 0.0 precision = 0.0 tp = 0 fp = 0 fn = len(gt_anno.values()) highest_conf_label = -1 # # print(filtered_boxes) # if len(filtered_boxes.keys()) != 0: # 何かしら検出された時 # is_detected = True # [tp, fp, fn], iou, ap, highest_conf_label = evaluate(filtered_boxes, gt_anno, img, thresh=0.1) #一枚の画像の評価を行う # # else: # 何も検出されなかった時 # is_detected = False # iou = 0.0 # ap = 0.0 # tp = 0 # fp = 0 # fn = len(gt_anno.values()) # highest_conf_label = -1 # total_iou.append(iou) total_ap.append(precision) total_tp += tp total_fp += fp total_fn += fn print("IoU:", iou) print("mean average IoU:", sum(total_iou) / (len(total_iou) + 1e-05)) print("AP:", precision) print("mAP:", sum(total_ap) / (len(total_ap) + 1e-05)) # # # # draw pred_bbox # draw_boxes(filtered_boxes, img, classes, (cfg.IMAGE_SIZE, cfg.IMAGE_SIZE), True) # # draw GT # draw = ImageDraw.Draw(img) # color = (0, 0, 0) # draw.rectangle(gt_box, outline=color) # draw.text(gt_box[:2], 'GT_'+classes[gt_label], fill=color) # # img.save(os.path.join(cfg.OUTPUT_DIR, '{0:04d}_'.format(count)+os.path.basename(gt[0][0]))) movie_name = os.path.basename(os.path.dirname(gt[0][0])) movie_parant_dir = os.path.basename( os.path.dirname(os.path.dirname(gt[0][0]))) pred_label = classes[ highest_conf_label] if highest_conf_label != -1 else "None" save_messe = [ gt[0][0], os.path.join(movie_name, movie_parant_dir), iou, tp, fp, fn, precision, classes[gt_label], pred_label, detect_time ] writer.writerow(save_messe) print(save_messe) print("total tp :", total_tp) print("total fp :", total_fp) print("total fn :", total_fn) f.close() print("proc finished.")