def prep_frame(ftpts, frame, camera, errors, occupants, ped_bboxes, veh_bboxes, classes): pix_real = camera["im-gps"] real_pix = camera["gps-im"] origin = camera["estimated_camera_location"] frame_size = camera["frame_size"] print(frame_size) #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if len(ped_bboxes) > 0: frame = utils.draw_bbox(frame, ped_bboxes, classes, show_label=True, redact=True) frame, x = pr.draw_radius(frame, ftpts, real_pix, pix_real, origin) utils.overlay_occupancy(frame, errors, occupants, frame_size) if len(veh_bboxes) > 0: frame = utils.draw_bbox(frame, veh_bboxes, classes, show_label=False, redact=False) #result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) return frame
def test_predict(): from sklearn.model_selection import train_test_split import skimage.io as io with open('bboxGen/cropping.txt', 'rt') as f: df = f.read().split('\n')[:-1] df = [line.split(',') for line in df] coords = [] for p, *coord in df: coords.append((p, [(int(coord[i]), int(coord[i+1])) for i in range(0, len(coord), 2)])) trn_fnms, val_fnms = train_test_split(coords, test_size=200, random_state=42) model = bbox_model() model.load_state_dict(torch.load('bboxGen/logs/model-74.pth')) model.to(device) model.eval() imgnm, gtbox = val_fnms[np.random.randint(0, high=199)] gtbox = bounding_rectangle(gtbox, 9999999) with torch.no_grad(): pred = predict(model, 'rawdata/bboxes/'+imgnm) image = io.imread('rawdata/bboxes/'+imgnm) draw_bbox(image, pred, gtbox) print(gtbox) print(pred) fnms = os.listdir('rawdata/train') trn_img = os.path.join('rawdata/train', fnms[np.random.randint(0, len(fnms))]) with torch.no_grad(): pred = predict(model, trn_img) image = io.imread(trn_img) draw_bbox(image, pred)
def test_camera(): """ Simple test srcipt; requires /dev/video0 """ app = ObjectDetectorTF(gpu=False, cmap={1: 'person'}) cam = cv2.VideoCapture(0) fps = [] while True: ret, img = cam.read() if not ret: print('camera capture failed') break t0 = time.time() res = app(img) t1 = time.time() fps.append(1.0 / (t1 - t0 + 1e-9)) msk = (res['score'] > 0.5) cls = res['class'][msk] box = res['box'][msk] score = res['score'][msk] for box_, cls_ in zip(box, cls): #ry0,rx0,ry1,rx1 = box_ # relative draw_bbox(img, box_, str(cls_)) cv2.imshow('win', img) k = cv2.waitKey(1) if k in [ord('q'), 27]: print('quitting...') break print('average fps: {}'.format(np.mean(fps[-100:])))
def run(args): model = build_model() (clf, class_names) = read_classifier( os.path.join(args.data_path, 'classifier.pickle')) # if classifier is none we only have one face if clf is None: verified_embedding, only_class = read_only_embedding(args.data_path) cap = cv2.VideoCapture(0) if (cap.isOpened() == False): print("Error opening video stream or file") face_detector = FaceDetector() while (cap.isOpened()): # Capture frame-by-frame ret, frame = cap.read() if ret == True: # Detect image and write it faces = face_detector.detect_faces(frame) for face in faces: x, y, w, h = face cropped = frame[y:y + h, x:x + w] cropped = cv2.resize(cropped, (96, 96)) cropped = np.around(convert_image(cropped), decimals=12) embedding = model.predict(np.array([cropped])) if clf is None: dist = np.linalg.norm(verified_embedding - embedding) match = dist < 0.7 label = only_class if match else "Unknown" if args.debug: label += ' (d: {})'.format(round(dist, 2)) else: predictions = clf.predict_proba(embedding) pred_class = np.argmax(predictions, axis=1)[0] score = round(np.max(predictions) * 100, 2) match = score > 70 name = class_names[pred_class] label = '{} ({}%)'.format(name, score) color = (0, 255, 0) if match else (0, 0, 255) draw_bbox(frame, x, y, x + w, y + h, label=label, color=color) cv2.imshow('Frame', frame) # Press Q on keyboard to exit if cv2.waitKey(25) & 0xFF == ord('q'): break else: break cap.release() cv2.destroyAllWindows()
def capture(named_path, data_path, count): cap = cv2.VideoCapture(0) # Check if camera opened successfully if (cap.isOpened() == False): print("Error opening video stream or file") captured_counter = 0 face_detector = FaceDetector() model = build_model() while (cap.isOpened() and captured_counter < count): # Capture frame-by-frame ret, frame = cap.read() if ret == True: # Show progress bar draw_progressbar(frame, (captured_counter / count)) # Detect image and write it faces = face_detector.detect_faces(frame) if len(faces) > 0: # Per person path file_path = os.path.join(named_path, str(captured_counter + 1) + '.jpg') print('Writing capture: ' + file_path) face = faces[0] # Assume it's the only face x, y, w, h = face cropped = frame[y:y + h, x:x + w] cropped = cv2.resize(cropped, (96, 96)) cv2.imwrite(file_path, cropped) captured_counter += 1 draw_bbox(frame, x, y, x + w, y + h, label="Face detected") cv2.imshow('Frame', frame) # Press Q on keyboard to exit if cv2.waitKey(25) & 0xFF == ord('q'): break # Break the loop else: break # When everything done, release the video capture object cap.release() cv2.destroyAllWindows() # Build and Write the embedding file for this person build_embedding(model, named_path) # Rebuild the classifier build_classifier(data_path) print('Done!')
def detect_video(predict_func, video_path, class_name, input_size, output_path=""): # read video vid = cv.VideoCapture(video_path) video_size = (int(vid.get(cv.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv.CAP_PROP_FRAME_HEIGHT))) if not vid.isOpened(): raise IOError("Couldn't open webcam or video") video_FourCC = int(vid.get(cv.CAP_PROP_FOURCC)) video_fps = vid.get(cv.CAP_PROP_FPS) isOutput = True if output_path != "" else False if isOutput: out = cv.VideoWriter(output_path, video_FourCC, video_fps, video_size) # fps info accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() # generate color map colors = gen_colors(len(class_name)) # read frames while True: return_value, org_frame = vid.read() if return_value == False: break # detect # frame = squar_crop(frame, cropSize) frame = cv.resize(org_frame, input_size) frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB) boxes, scores, classes = predict_func(frame) draw_bbox(org_frame, class_name, boxes.numpy(), scores.numpy(), classes.numpy(), colors) curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 cv.putText(org_frame, text=fps, org=(3, 15), fontFace=cv.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(0, 255, 0), thickness=2) cv.namedWindow("result", cv.WINDOW_NORMAL) cv.imshow("result", org_frame) if isOutput: out.write(org_frame) if cv.waitKey(1) & 0xFF == ord('q'): break cv.destroyAllWindows()
def detect_img(predict_func, img_path, class_name, input_size): orgimg = cv.imread(img_path) img = cv.resize(orgimg, (416, 416)) img = cv.cvtColor(img, cv.COLOR_BGR2RGB) colors = gen_colors(len(class_name)) start = timer() boxes, scores, classes = predict_func(img) end = timer() print('spent time: %.3fs' % (end - start)) draw_bbox(orgimg, class_name, boxes.numpy(), scores.numpy(), classes.numpy(), colors) cv.imshow('img', orgimg) cv.waitKey()
def predict_img(self, raw_img, random_color=True, plot_img=True, figsize=(10, 10), show_text=True, return_output=False): print('img shape: ', raw_img.shape) img = self.preprocess_img(raw_img) imgs = np.expand_dims(img, axis=0) pred_output = self.inference_model.predict(imgs) detections = get_detection_data(img=raw_img, model_outputs=pred_output, class_names=self.class_names) output_img = draw_bbox(raw_img, detections, cmap=self.class_color, random_color=random_color, figsize=figsize, show_text=show_text, show_img=plot_img) if return_output: return output_img, detections else: return detections
def predict(self): np.set_printoptions(threshold=np.inf) image_path = './414162.jpg' image = np.array(cv2.imread(image_path)) image_shape = image.shape print("image_shape: ", image_shape) image = np.copy(image) image_data = utils.image_preprocess(image, [self.input_size, self.input_size]) image_data = image_data[np.newaxis, ...] pred_bbox = self.sess.run([self.pred_bbox], feed_dict={ self.input: image_data, self.training: False }) pred_bbox = np.array(pred_bbox[0]) pred_bbox = utils.postprocess_boxes(pred_bbox, image_shape, 416, 0.5) print("pred_bbox shape: ", pred_bbox.shape) pred_bbox = utils.nms(pred_bbox, 0.45) print("pred_bbox after: ", pred_bbox) image = utils.draw_bbox(image, pred_bbox, show_label=True) cv2.imwrite('./test.jpg', image)
def predict(model, detection, img_path, threshold=0.6): """ result is coord_list is shape = (number of plates, (x1, y1, x2, y2, prediction_rate)) """ source_image = cv2.imread(img_path) img_w, img_h, c = source_image.shape possible_plates = detection.detect_plate(source_image) coord_list = [] l = len(possible_plates) if l > 0: x_in = np.zeros((l, config.INPUT_HEIGHT, config.INPUT_WIDTH, 3)) count = 0 for count, plate in enumerate(possible_plates): img_temp = plate.get_plate_img(source_image) img_temp = reshape_img(img_temp) x_in[count] = img_temp pr = model.predict(x_in) print(pr) pr = pr[..., -1] for i in range(len(pr)): if pr[i] > threshold: temp_box = list(possible_plates[i].get_extend_box()) temp_box.append(pr[i]) coord_list.append(temp_box) result_img = source_image for coord in coord_list: result_img = draw_bbox(result_img, coord[0:4]) return coord_list, result_img
def is_img(img_cv, color): j = 0 if len(img_cv) != 0: print("---1312--------------") for i in range (len(img_cv)): im_cv_r = cv2.resize(img_cv[i], (1300, 414)) gray = cv2.cvtColor(im_cv_r, cv2.COLOR_BGR2GRAY) equ = cv2.equalizeHist(gray) gaussian = cv2.GaussianBlur(gray, (3, 3), 0, 0, cv2.BORDER_DEFAULT) median = cv2.medianBlur(gaussian, 3) original_image = median original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] data = json.dumps({"signature_name": "serving_default", "instances": image_data.tolist()}) headers = {"content-type": "application/json"} num_classes=65 json_response = requests.post( 'http://tf:port/v1/models/yolov3:predict', data=data, headers=headers) predictions = json.loads(json_response.text)['predictions'] pred_sbbox, pred_mbbox, pred_lbbox =predictions[0]['pred_sbbox'],predictions[0]['pred_mbbox'],predictions[0]['pred_lbbox'] pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') if np.array(bboxes).shape[0] > 6: image = utils.draw_bbox(im_cv_r, bboxes) # print(image) name = color +'im' + str(i) + '.jpg' path = os.path.join("./pre_out/", name) cv2.imwrite(path,image) print("-------------")
def main(model_path, img_folder, save_path, gpu_id): if os.path.exists(save_path): shutil.rmtree(save_path, ignore_errors=True) if not os.path.exists(save_path): os.makedirs(save_path) save_img_folder = os.path.join(save_path, 'img') if not os.path.exists(save_img_folder): os.makedirs(save_img_folder) save_txt_folder = os.path.join(save_path, 'result') if not os.path.exists(save_txt_folder): os.makedirs(save_txt_folder) img_paths = [os.path.join(img_folder, x) for x in os.listdir(img_folder)] model = Pytorch_model(model_path, gpu_id=gpu_id) total_frame = 0.0 total_time = 0.0 for img_path in tqdm(img_paths): img_name = os.path.basename(img_path).split('.')[0] save_name = os.path.join(save_txt_folder, 'res_' + img_name + '.txt') _, boxes_list, t = model.predict(img_path) total_frame += 1 total_time += t img = draw_bbox(img_path, boxes_list, color=(0, 0, 255)) cv2.imwrite(os.path.join(save_img_folder, '{}.jpg'.format(img_name)), img) np.savetxt(save_name, boxes_list.reshape(-1, 8), delimiter=',', fmt='%d') print('fps:{}'.format(total_frame / total_time)) return save_txt_folder
def predict(data_loader, net, config): net.eval() for i, (imgs, reg_targets, cls_targets) in enumerate(data_loader): data = Variable(imgs.cuda(async=True), volatile=True) reg_preds, cls_preds = net(data) encoder = DataEncoder(config) for j in range(len(imgs)): img = imgs[j] reg_pred = reg_preds.cpu()[j] cls_pred = cls_preds.cpu()[j] reg_target = reg_targets[j] cls_target = cls_targets[j] bboxes, scores = encoder.decode(reg_pred.data, cls_pred.data.squeeze(), [config.img_max_size, config.img_max_size]) bboxes2, scores2 = encoder.decode(reg_target, cls_target, [config.img_max_size, config.img_max_size]) img = np.transpose(img.numpy(), (1, 2, 0)) img = ((img * config.sigma + config.mu) * 255).astype(np.uint8) draw_bbox(img, bboxes.numpy(), scores.numpy(), '/home/storage/lsy/fashion/predictions/'+config.clothes+'/%d-%d.png' % (i, j), bboxes2.numpy())
def __call__(self, IMG): IMG_ = pad_img(IMG) [NMS_IDX, BBOX, TOPK_CLASS, TOPK_SCORE] = self.sess.run([self.nms_idx, self.bbox, self.topK_class, self.topK_score], feed_dict={self.inputs: IMG_[np.newaxis] / 127.5 - 1.0, self.is_training: True}) for i in NMS_IDX: if TOPK_SCORE[i] > 0.5: IMG = draw_bbox(IMG, recover_ImgAndBbox_scale(IMG, BBOX[i]), CLASSES[TOPK_CLASS[i]]) # IMG_ = draw_bbox(IMG_, np.int32(BBOX[i]), CLASSES[TOPK_CLASS[i]]) return IMG
def predict(self, frame, resize=True): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if not resize: oryginal = frame frame = cv2.resize(frame, (self.input_size, self.input_size)) image_data = frame / 255. image_data = image_data[np.newaxis, ...].astype( np.float32 ) # Creates a single batch shape: (1, input_size, input_size, 3) self.interpreter.set_tensor(self.input_details[0]['index'], image_data) self.interpreter.invoke() pred = [ self.interpreter.get_tensor(self.output_details[i]['index']) for i in range(len(self.output_details)) ] boxes, pred_conf = utils.filter_boxes( pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([self.input_size, self.input_size])) boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=self.iou, score_threshold=self.treshold, ) pred_bbox = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] if not resize: image = utils.draw_bbox(oryginal, pred_bbox, self.classes) else: image = utils.draw_bbox(frame, pred_bbox, self.classes) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) return result
def main(_argv): config = ConfigProto() # config.gpu_options.allow_growth = True session = InteractiveSession(config=config) # model_module = sh_digit # if FLAGS.model == 'sh_digit': # model_module = sh_digit # STRIDES = model_module.STRIDES # ANCHORS = model_module.ANCHORS # NUM_CLASS = model_module.NUM_CLASS # XYSCALE = model_module.XYSCALE original_image = cv2.imread(FLAGS.image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = cv2.resize(original_image, tuple(FLAGS.image_size_wh)) image_data = image_data / 255. images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] print(boxes) boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) print(boxes.numpy()) pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()] image = utils.draw_bbox(original_image, pred_bbox, classes=utils.read_class_names('cfg/digit/digit.names')) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) cv2.imshow('result', image) cv2.waitKey()
def test_image(img='/tmp/image1.jpg'): """ Simple test script; requires /tmp/image1.jpg """ app = ObjectDetectorTF(model='model') img = cv2.imread(img) res = app(img) msk = (res['score'] > 0.5) cls = res['class'][msk] box = res['box'][msk] score = res['score'][msk] print('score', score) for box_, cls_ in zip(box, cls): #ry0,rx0,ry1,rx1 = box_ # relative draw_bbox(img, box_, str(cls_)) cv2.imshow('win', img) cv2.waitKey(0)
def object_detect(input_path="./road.jpg", output_path='./demo.jpg'): img_size = 608 num_channels = 3 # image_path = "./docs/images/sample_computer.jpg" image_path = input_path # 调用图片,示例:"./docs/images/sample_computer.jpg" original_image = cv2.imread(image_path) # original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = utils.image_preporcess(np.copy(original_image), [img_size, img_size]) # 图片处理成608*608*3 # print(image_data.shape) plt.imshow(image_data) plt.show() yolov3_api = "http://localhost:8501/v1/models/yolov3:predict" # 刚刚产生的接口 image_data_yolo_list = image_data[np.newaxis, :].tolist() # 转化为多维数组矩阵 headers = {"Content-type": "application/json"} r = requests.post(yolov3_api, headers=headers, data=json.dumps({ "signature_name": "predict", "instances": image_data_yolo_list })).json() #post请求 # print('r',r) # 19, 19, 85 = 30685 # {'error': 'Input to reshape is a tensor with 18411 values, but the requested shape requires a multiple of 30685\n\t [[{{node pred_multi_scale/Reshape_2}}]]'} # 18411 的因子 [3, 17, 19, 51, 57, 323, 361, 969, 1083, 6137] output = np.array(r['predictions']) # print(output.shape) # (63, 19, 19, 85) reduction factor 注:衰减系数以及步长:32 608/32=19 85 = 80类+1可能性+4个坐标 # 416 x 416 则为 13*13 output = np.reshape( output, (-1, 85 )) # 这一步处理成 22743*85的维度(63*19*19 =22743, 85 = 80类+1可能性+4个坐标,根据自己数据集改) # print(output.shape) original_image_size = original_image.shape[:2] bboxes = utils.postprocess_boxes( output, original_image_size, img_size, 0.3) # 这一步是将所有可能的预测信息提取出来,主要是三类:类别,可能性,坐标值。 bboxes = utils.nms(bboxes, 0.45, method='nms') # 这一步是 将刚刚提取出来的信息进行筛选,返回最好的预测值,同样是三类。 image = utils.draw_bbox(original_image, bboxes) # 这一步是把结果画到新图上面。 image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) image = Image.fromarray(image) image.show() image.save(output_path) # 保存图片到本地
def main(filename): flow.load_variables(flow.checkpoint.get(cfg.TEST.WEIGHT_FILE)) image = cv2.imread(filename) bboxes = predict(image) image = utils.draw_bbox(image, bboxes, [ 'car', ]) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = Image.fromarray(image) image.show()
def detect_img(model_result, input_size, fp_src, fp_dst): original_image = cv2.imread(fp_src) original_image_draw = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = np.array(image_data).astype(np.float32) bboxes = model_result.model_reponse(image_data, original_image_size) image = utils.draw_bbox(original_image_draw, bboxes) image = Image.fromarray(image) image.save(fp_dst)
def text_predict(img): # img = cv2.imread(imgpath) preds, boxes_list, rects_re, t = text_handle.predict( img, long_size=pse_long_size) img2 = draw_bbox(img, boxes_list, color=(0, 255, 0)) cv2.imwrite("debug_im/draw.jpg", img2) result = crnnRec(np.array(img), rects_re) return result
def detect_image(self, image_path=None, output_path=None, input_size=416, show=False, score_threshold=0.3, iou_threshold=0.45, rectangle_colors=''): if image_path is not None: original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = tf.expand_dims(image_data, 0) # it gives output in three different scale pred_bbox = self.tiny_YoloV3.predict(image_data) print(pred_bbox[0].shape) print(pred_bbox[1].shape) pred_bbox = [ tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox ] pred_bbox = tf.concat(pred_bbox, axis=0) # print(pred_bbox) bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold) print(bboxes.shape) bboxes = nms(bboxes, iou_threshold, method='nms') print(bboxes[0].shape) print(len(bboxes)) image = draw_bbox(original_image, bboxes, CLASSES=self.CLASSES, rectangle_colors=rectangle_colors) # print(image.shape) if output_path is not None: cv2.imwrite(output_path, image) if show: # Show the image cv2.imshow("predicted image", image) # Load and hold the image cv2.waitKey(0) # To close the window after the required kill value was provided cv2.destroyAllWindows() return image
def predict_nonms(self, img_path, iou_threshold=0.413, score_threshold=0.1): raw_img = cv2.imread(img_path) print('img shape: ', raw_img.shape) img = self.preprocess_img(raw_img) imgs = np.expand_dims(img, axis=0) yolov4_output = self.yolo_model.predict(imgs) output = yolov4_head(yolov4_output, self.num_classes, self.anchors, self.xyscale) pred_output = nms(output, self.img_size, self.num_classes, iou_threshold, score_threshold) pred_output = [p.numpy() for p in pred_output] detections = get_detection_data(img=raw_img, model_outputs=pred_output, class_names=self.class_names) draw_bbox(raw_img, detections, cmap=self.class_color, random_color=True) return detections
def text_predict(img): # img = cv2.imread(imgpath) if det_model_type == "pse_mobilenetv2": preds, boxes_list, rects_re, t = text_handle.predict( img, long_size=pse_long_size) else: boxes_list, score_list = text_handle.process(img) img2 = draw_bbox(img, boxes_list, color=(0, 255, 0)) cv2.imwrite("debug_im/draw.jpg", img2) # result = crnnRec(np.array(img), rects_re) result = crnnRecWithBox(np.array(img), boxes_list) return result
def yolo_predict(img_input): sample = source = cv2.imread(img_input) sample = transform_img(sample, IMAGE_SIZE) sample_shape = sample.shape result = np.expand_dims(sample, 0) result = model.predict(result) result = decode_output(result, sample_shape, GRID_SIZE) source_shape = source.shape result = get_original_bbox(result, source_shape, IMAGE_SIZE) img = draw_bbox(source, result) # show_img(img, str(source_shape)) return img
def two(self, path1, path2=None): lines = os.listdir(path1) for num, line in enumerate(lines): annotation = line.split() # print("annotation109",annotation,type(annotation)) image_path = os.path.join(path1, "".join(annotation)) # print("image_path",image_path) image = cv2.imread(image_path) bboxes_pr = self.predict(image) image = utils.draw_bbox(image, bboxes_pr, show_label=self.show_label) if path2 != None: cv2.imwrite(path2 + "{}".format(num) + ".jpg", image) cv2.imshow('t', image) cv2.waitKey(0) cv2.destroyAllWindows()
def detect_video(video_filename, show=False, interest_rect=None): cap = cv2.VideoCapture(video_filename) assert cap.isOpened() bboxs = [] while True: res, img = cap.read() if not res: break start_time = time.time() detect_img = img if interest_rect is not None: detect_img = img.copy() image_h, image_w, _ = detect_img.shape l, t, r, b = double_rect(interest_rect) l = max(0, l) t = max(0, t) r = min(r, image_w) b = min(b, image_h) detect_img = detect_img[t:b, l:r, ...] bbox = predict(detect_img) for box in bbox: box[0] = box[0] + l box[2] = box[2] + l box[1] = box[1] + t box[3] = box[3] + t else: bbox = predict(detect_img) bboxs.append(bbox) if show: img = utils.draw_bbox(img, bbox, [ 'car', ]) consume = (time.time() - start_time) * 1000 consume = max(0, consume) cv2.imshow('video', img) cv2.waitKey(max(1, 45 - int(consume))) return bboxs
label_reader = GroundtruthReader(args.groundtruth) bundled_reader = zip(frame_reader, label_reader) tracker = None net = models.vgg16_bn(pretrained=True) patch_video_writer = None feature_video_writer = None for frame, gt_bbox in zip(frame_reader, label_reader): if tracker is None: tracker = Tracker(frame, gt_bbox) continue bbox = tracker.track(frame) draw_bbox(frame, bbox, (255, 0, 0)) draw_bbox(frame, gt_bbox, (0, 255, 0)) cv2.imshow("Demo", frame) # if patch_video_writer is None: # patch_video_writer = cv2.VideoWriter('patch.avi', cv2.VideoWriter.fourcc('M', 'J', 'P', 'G'), 50, # (224, 224)) # feature_video_writer = cv2.VideoWriter('features.avi', cv2.VideoWriter.fourcc('M', 'J', 'P', 'G'), 50, # (224, 224)) # offset = int(max(gt_bbox[2], gt_bbox[3]) / 2) # x_mid = int(gt_bbox[0] + gt_bbox[2] / 2) # y_mid = int(gt_bbox[1] + gt_bbox[3] / 2) # patch = cv2.resize(frame[y_mid - offset:y_mid + offset, x_mid - offset: x_mid + offset, :], (224, 224)) # patch_video_writer.write(patch) # # features = cv2.resize(
cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() if ret: image = preprocess(frame) img = tf.constant(image) pred_bbox = infer(img) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=0.45, score_threshold=0.25) pred_bbox = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] image = draw_bbox(frame, pred_bbox) cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): break
frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.4) bboxes = utils.nms(bboxes, 0.3, method='nms') image = utils.draw_bbox(frame, bboxes) out.write(image) result = np.asarray(image) success, frame = vid.read() num_frame += 1 print("number of frame: ", num_frame) vid.release() out.release() print("end of program")