def train_and_eval(params): # session setting """ os.environ['TF_CPP_MIN_LOG_LEVEL'] 0 = all messages are logged (default behavior) 1 = INFO messages are not printed 2 = INFO and WARNING messages are not printed 3 = INFO, WARNING, and ERROR messages are not printed """ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' gpu_options = GPUOptions(per_process_gpu_memory_fraction=1.0) config = ConfigProto(gpu_options=gpu_options) config.gpu_options.allow_growth = True session = InteractiveSession(config=config) print("Number of GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) ## program parameter BASE_DIR = params[0] TRAIN_DIR_PATH = BASE_DIR + 'train/' VALIDATION_DIR_PATH = BASE_DIR + 'validation/' seed = params[1] time_stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") log_dir = params[2] + time_stamp tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0) file_writer = tf.summary.create_file_writer(log_dir) ## training parameter loss_func = params[3] input_size = params[4] steps_per_epoch = params[5] EPOCHS = params[6] BS = params[7] IMAGE_COUNT = params[8] VALIDATION_COUNT = params[9] learning_rate = params[10] ## construct training and validation set training_data = DataGenerator(TRAIN_DIR_PATH, batch_size=BS, image_size=input_size[0]) validating_data = DataGenerator(VALIDATION_DIR_PATH, batch_size=BS, image_size=input_size[0]) ## load model model = unet(input_size=input_size, loss_func=loss_func, l_rate=learning_rate) model.summary() print('#### Model loaded') ## training begin model.fit_generator(training_data, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, validation_data=validating_data, callbacks=[tensorboard_callback]) if not os.path.exists('./model/'): os.makedirs('./model/') model.save("model/UNet_%s.h5" % time_stamp) print("model saved at model/UNet_%s.h5" % time_stamp) text = 'UNet_%s.h5\n\ Learning rate: %s\n\ Image size: %s\n\ Epoch: %s\n\ Batch size: %s\n\ Step per epoch: %s\n'\ %(time_stamp, learning_rate, input_size, steps_per_epoch, BS, EPOCHS) with open("./log.txt", "a") as myfile: myfile.write(text) file_writer.close() InteractiveSession.close(session) ## prediction begin predict_folder(model, '%stest/' % BASE_DIR, save_dir='./result/%s' % (time_stamp)) InteractiveSession.close(session)
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size images = FLAGS.images # load model if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) # loop through images in list and run Yolov4 model on each for count, image_path in enumerate(images, 1): original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) if FLAGS.framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) pred_bbox = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) allowed_classes = ['person'] image = utils.draw_bbox(original_image, pred_bbox, allowed_classes=allowed_classes) image = Image.fromarray(image.astype(np.uint8)) if not FLAGS.dont_show: image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image)
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size images = FLAGS.images # load model if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) # loop through images in list and run Yolov4 model on each for count, image_path in enumerate(images, 1): original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. # get image name by using split method image_name = image_path.split('/')[-1] image_name = image_name.split('.')[0] images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) if FLAGS.framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] # run non max suppression on detections boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = original_image.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) # hold all detection data in one variable pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) #allowed_classes = ['person'] # if crop flag is enabled, crop each detection and save it as new image if FLAGS.crop: crop_path = os.path.join(os.getcwd(), 'detections', 'crop', image_name) try: os.mkdir(crop_path) except FileExistsError: pass crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox, crop_path, allowed_classes) # if ocr flag is enabled, perform general text extraction using Tesseract OCR on object detection bounding box if FLAGS.ocr: ocr(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox) # if count flag is enabled, perform counting of objects if FLAGS.count: # count objects found counted_classes = count_objects(pred_bbox, by_class=True, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate) else: image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate=FLAGS.plate) image = Image.fromarray(image.astype(np.uint8)) if not FLAGS.dont_show: image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image)
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size image_path = FLAGS.image original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) # det file img_name image_split = image_path.split('/') image_check = image_split[-1] image_get = image_check.split('.') image_name = image_get[0] #image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. #image_data = image_data[np.newaxis, ...].astype(np.float32) images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() #print(input_details) #print(output_details) interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.4, input_shape=tf.constant( [input_size, input_size])) else: # yolov4 boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.4, input_shape=tf.constant( [input_size, input_size])) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=100, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) pred_bbox = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] #print(pred_bbox) image = utils.draw_bbox(image_data * 255, pred_bbox) f1 = open('/data1/minji/yolov3_bdd/eval/bdd100k_det_v4_check.txt', 'r') f2 = open('/data1/minji/yolov3_bdd/eval/bdd100k_det_v4.txt', 'a') data = '' while True: line = f1.readline() if not line: break data += image_name data += line print('line:', data) f2.write(data) f2.close() f1.close() image = Image.fromarray(image.astype(np.uint8)) #image.show() #image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) #cv2.imwrite(FLAGS.output, image) '''
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # get video name by using split method video_name = video_path.split('/')[-1] video_name = video_name.split('.')[0] if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_num += 1 image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) #allowed_classes = ['person'] # if crop flag is enabled, crop each detection and save it as new image if FLAGS.crop: crop_rate = 150 # capture images every so many frames (ex. crop photos every 150 frames) crop_path = os.path.join(os.getcwd(), 'detections', 'crop', video_name) try: os.mkdir(crop_path) except FileExistsError: pass if frame_num % crop_rate == 0: final_path = os.path.join(crop_path, 'frame_' + str(frame_num)) try: os.mkdir(final_path) except FileExistsError: pass crop_objects(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), pred_bbox, final_path, allowed_classes) else: pass if FLAGS.count: # count objects found counted_classes = count_objects(pred_bbox, by_class=False, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate) else: image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate=FLAGS.plate) fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(image) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("result", result) if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) save_trt()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 time_stamp = [] time_stamp_vid2 = [] time_ev_vid1 = datetime.strptime('00:00:00:00', "%H:%M:%S:%f") time_ev_vid2 = datetime.strptime('00:00:00:00', "%H:%M:%S:%f") flag = 0 time_dict = collections.defaultdict(list) time_ev1 = 0 time_ev2 = 0 time_diff = 0 reader = easyocr.Reader(['en']) print("easy ocr vocab loaded") # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video video_path1 = FLAGS.video1 # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) vid1 = cv2.VideoCapture(int(video_path1)) except: vid = cv2.VideoCapture(video_path) vid1 = cv2.VideoCapture(video_path1) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 cost = 0 _, frame_prev = vid1.read() frame_prev = cv2.cvtColor(frame_prev, cv2.COLOR_BGR2GRAY) # while video is running while True: return_value, frame = vid.read() return_value1, frame1 = vid1.read() result1 = frame1 if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break if (return_value1 and flag == 0): frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) cost = MSE(frame_prev, frame1) print("cost between two frames = ", cost) if (cost > 2000): text_in_frame2 = reader.readtext(frame1) time_in_frame2 = text_in_frame2[0][1] print("time in frame 1 ", time_in_frame2) time_frame2 = datetime.strptime(time_in_frame2, "%H:%M:%S:%f") time_stamp_vid2.append(time_frame2) time_ev_vid1 = time_frame2 time_ev1 = time_in_frame2 #time_dict['Event 1'].extend(str(time_in_frame2)) flag = 1 frame_prev = frame1 frame_num += 1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track center_coordinates = (int( (bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2)) print(center_coordinates) if (class_name == 'person' and center_coordinates[0] in range(120, 476) and center_coordinates[1] in range(191, 979)): # adding the ocr text_in_frame = reader.readtext(frame_gray) # if(len(text_in_frame) > 0): print("text in frame", text_in_frame) time_in_frame1 = text_in_frame[0][1] time_ev2 = time_in_frame1 print("time in frame", time_in_frame1) time_frame1 = datetime.strptime(time_in_frame1, "%H:%M:%S:%f") #time_stamp.append(time_frame1) if (flag == 1): time_ev_vid2 = time_frame1 #time_dict['Event two'].extend(str(time_in_frame1)) time_stamp.append(time_frame1) print("the time difference is ", (time_ev_vid2 - time_ev_vid1).seconds) flag = 2 break #print(vid.get(cv2.CAP_PROP_POS_MSEC)/1000) #time_stamp.append(vid.get(cv2.CAP_PROP_POS_MSEC)/1000) if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) #print(time_stamp) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) #result1 = cv2.cvtColor(frame1, cv2.COLOR_GRAY2BGR) # with open("video.txt", "w") as f: # f.write(str(time_stamp)) # with open("video2.txt", "w") as k: # k.write(str(time_stamp_vid2)) result1 = cv2.resize(result1, (400, 400)) result = cv2.resize(result, (400, 400)) if not FLAGS.dont_show: cv2.imshow("Output Video", np.concatenate((result1, result), axis=1)) if (flag == 2): print("the time difference is ", (time_ev_vid2 - time_ev_vid1).seconds) time_diff = (time_ev_vid2 - time_ev_vid1).seconds #time_dict['Time difference in seconds'].extend(str((time_ev_vid2 - time_ev_vid1).seconds)) break if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break time_diction = { "Event 1 Occured at ": time_ev1, "Event 2 occured at ": time_ev2, "time difference in seconds ": time_diff } # if output flag is set, save video file with open('output.json', "w") as out: json.dump(time_diction, out) cv2.destroyAllWindows()
def main(_argv): # get paramters and contract details if Parameters.is_contractor == True: # checks if this machine is outsourcer or verifier vk = VerifyKey(OutsourceContract.public_key_outsourcer) contractHash = Helperfunctions.hashContract().encode('latin1') model_to_use = OutsourceContract.model tiny = OutsourceContract.tiny merkle_tree_interval = OutsourceContract.merkle_tree_interval display_name = 'Contractor' else: vk = VerifyKey(VerifierContract.public_key_outsourcer) contractHash = Helperfunctions.hashVerifierContract().encode('latin1') model_to_use = VerifierContract.model tiny = VerifierContract.tiny merkle_tree_interval = 0 display_name = 'Verifier' sk = SigningKey(Parameters.private_key_self) framework = Parameters.framework weights = Parameters.weights count = Parameters.count dont_show = Parameters.dont_show info = Parameters.info crop = Parameters.crop input_size = Parameters.input_size iou = Parameters.iou score = Parameters.score hostname = Parameters.ip_outsourcer port = Parameters.port_outsourcer sendingPort = Parameters.sendingPort minimum_receive_rate_from_contractor = Parameters.minimum_receive_rate_from_contractor # configure video stream receiver receiver = vss.VideoStreamSubscriber(hostname, port) print('Receiver Initialized') # configure gpu usage config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) # load model if framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=weights) else: saved_model_loaded = tf.saved_model.load(weights, tags=[tag_constants.SERVING]) # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # configure responder responder = re.Responder(hostname, sendingPort) # configure and iniitialize statistic variables moving_average_points = 50 moving_average_fps = MovingAverage(moving_average_points) moving_average_receive_time = MovingAverage(moving_average_points) moving_average_decompress_time = MovingAverage(moving_average_points) moving_average_img_preprocessing_time = MovingAverage( moving_average_points) moving_average_model_inference_time = MovingAverage(moving_average_points) moving_average_img_postprocessing_time = MovingAverage( moving_average_points) moving_average_reply_time = MovingAverage(moving_average_points) moving_average_image_show_time = MovingAverage(moving_average_points) moving_average_verify_image_sig_time = MovingAverage(moving_average_points) moving_average_response_signing_time = MovingAverage(moving_average_points) image_count = 0 acknowledged_frames = 0 a = 0 b = 0 # configure Merkle tree related variables if merkle trees are to be used if merkle_tree_interval > 0: mt = MerkleTools() mtOld = MerkleTools() interval_count = 0 mtOld_leaf_indices = {} mt_leaf_indices = {} current_challenge = 1 merkle_root = '' last_challenge = 0 # start real time processing and verification while True: start_time = time.perf_counter() # receive image name, compressed = receiver.receive() if name == 'abort': sys.exit('Contract aborted by outsourcer according to custom') received_time = time.perf_counter() # decompress image decompressedImage = cv2.imdecode( np.frombuffer(compressed, dtype='uint8'), -1) decompressed_time = time.perf_counter() # verify image (verify if signature matches image, contract hash and image count, and number of outptuts received) if merkle_tree_interval == 0: try: vk.verify( bytes(compressed) + contractHash + bytes(name[-2]) + bytes(name[-1]), bytes(name[:-2])) except: sys.exit( 'Contract aborted: Outsourcer signature does not match input. Possible Consquences for Outsourcer: Blacklist, Bad Review' ) if name[-1] < (image_count - 2) * minimum_receive_rate_from_contractor or name[ -1] < acknowledged_frames: sys.exit( 'Contract aborted: Outsourcer did not acknowledge enough ouputs. Possible Consquences for Outsourcer: Blacklist, Bad Review' ) acknowledged_frames = name[-1] else: # verify if signature matches image, contract hash, and image count, and number of intervals, and random number try: vk.verify( bytes(compressed) + contractHash + bytes(name[-5]) + bytes(name[-4]) + bytes(name[-3]) + bytes(name[-2]) + bytes(name[-1]), bytes(name[:-5])) except: sys.exit( 'Contract aborted: Outsourcer signature does not match input. Possible Consquences for Outsourcer: Blacklist, Bad Review' ) if name[-4] < (image_count - 2) * minimum_receive_rate_from_contractor or name[ -4] < acknowledged_frames: sys.exit( 'Contract aborted: Outsourcer did not acknowledge enough ouputs. Possible Consquences for Outsourcer: Blacklist, Bad Review' ) acknowledged_frames = name[-4] outsorucer_signature = name[:-5] outsourcer_image_count = name[-5] outsourcer_number_of_outputs_received = name[-4] outsourcer_random_number = name[-3] outsourcer_interval_count = name[-2] outsourcer_time_to_challenge = bool(name[-1]) verify_time = time.perf_counter() # image preprocessing original_image = cv2.cvtColor(decompressedImage, cv2.COLOR_BGR2RGB) image_data = cv2.resize(original_image, (input_size, input_size)) # 0.4ms image_data = image_data / 255. # 2.53ms images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) # 3.15ms image_preprocessing_time = time.perf_counter() # inference if framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if model_to_use == 'yolov3' and tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] model_inferenced_time = time.perf_counter() # image postprocessing # region h = time.perf_counter() boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=score) # 1.2ms # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = original_image.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) # 1ms # hold all detection data in one variable pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) #allowed_classes = ['person'] # if crop flag is enabled, crop each detection and save it as new image if crop: crop_path = os.path.join(os.getcwd(), 'detections', 'crop', image_name) try: os.mkdir(crop_path) except FileExistsError: pass crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox, crop_path, allowed_classes) if count: # count objects found counted_classes = count_objects(pred_bbox, by_class=False, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) boxtext, image = utils.draw_bbox(original_image, pred_bbox, info, counted_classes, allowed_classes=allowed_classes) else: boxtext, image = utils.draw_bbox( original_image, pred_bbox, info, allowed_classes=allowed_classes) # 0.5ms image = Image.fromarray(image.astype(np.uint8)) # 0.3ms # endregion # prepare response if merkle_tree_interval == 0: boxtext = 'Image' + str(name[-2]) + ':;' + boxtext else: boxtext = 'Image' + str(outsourcer_image_count) + ':;' + boxtext #boxtext += "Object found: Person" #dishonest image_postprocessing_time = time.perf_counter() if merkle_tree_interval == 0: sig = sk.sign(boxtext.encode('latin1') + contractHash).signature sig = sig.decode('latin1') # send reply responder.respond(boxtext + ';--' + sig) else: mt.add_leaf(boxtext, True) # add leafs dynamiclly to merkle tree # remember indices for challenge mt_leaf_indices[ outsourcer_image_count] = image_count % merkle_tree_interval response = boxtext # if statement is true then it's time to send a new merkle root # e.g. if inervall = 128 then all respones from 0-127 are added to the merkle tree if image_count > 1 and (image_count + 1) % merkle_tree_interval == 0: mt.make_tree() merkle_root = mt.get_merkle_root() #merkle_root = mt.get_leaf(0) #dishonest sig = sk.sign( merkle_root.encode('latin1') + bytes(interval_count) + contractHash).signature # sign merkle root # resond with merkle root response += ';--' + str(merkle_root) + \ ';--' + sig.decode('latin1') interval_count += 1 mtOld = mt # save old merkle tree for challenge mtOld_leaf_indices.clear() mtOld_leaf_indices = mt_leaf_indices.copy( ) # save old indices for challenge mt_leaf_indices.clear() # clear for new indices mt = MerkleTools( ) # construct new merkle tree for next interval else: # if statement is true then it's time to resend the merkle root because outsourcer has not received it yet # if this is true then the outsourcer has not received the merkle root yet -> send again if interval_count > outsourcer_image_count: sig = sk.sign( merkle_root.encode('latin1') + bytes(interval_count) + contractHash).signature # sign merkle root response += ';--' + str(merkle_root) + \ ';--' + sig.decode('latin1') else: # in this case outsourcer has confirmed to have recieved the merkle root # if statement is true then it's time to resond to a challenge from the outsourcer # in this case outsourcer has sent a challenge to meet with the old merkle tree, give outsourcer 3 frames time to confirm challenge received before sending again if outsourcer_time_to_challenge and image_count - last_challenge > 3: last_challenge = image_count if outsourcer_random_number in mtOld_leaf_indices: # if challenge can be found, send proof back outsourcer_random_number_index = mtOld_leaf_indices[ outsourcer_random_number] else: # if challenge index cannot be found return leaf 0 outsourcer_random_number_index = 0 proofs = mtOld.get_proof( outsourcer_random_number_index) stringsend = '' for proof in proofs: stringsend += ';--' # indicate start of proof stringsend += proof.__str__() # send proof stringsend += ';--' # send leaf stringsend += mtOld.get_leaf( outsourcer_random_number_index) stringsend += ';--' stringsend += mtOld.get_merkle_root() # send root stringarr = [] stringarr = stringsend.split(';--') leaf_node = stringarr[-2] root_node = stringarr[-1] proof_string = stringarr[0:-2] sig = sk.sign( str(stringarr[1:]).encode('latin1') + bytes(interval_count - 1) + contractHash ).signature # sign proof and contract details # attach signature response += ';--' + sig.decode('latin1') response += stringsend # attach challenge response to response responder.respond(response) response_signing_time = time.perf_counter() replied_time = time.perf_counter() # display image if not dont_show: image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imshow(display_name, image) if cv2.waitKey(1) == ord('q'): responder.respond('abort12345:6') sys.exit( 'Contract aborted: Ended contract according to custom') image_showed_time = time.perf_counter() # statistics moving_average_fps.add(1 / (image_showed_time - start_time)) moving_average_receive_time.add(received_time - start_time) moving_average_decompress_time.add(decompressed_time - received_time) moving_average_verify_image_sig_time.add(verify_time - decompressed_time) moving_average_img_preprocessing_time.add(image_preprocessing_time - verify_time) moving_average_model_inference_time.add(model_inferenced_time - image_preprocessing_time) moving_average_img_postprocessing_time.add(image_postprocessing_time - model_inferenced_time) moving_average_response_signing_time.add( response_signing_time - image_postprocessing_time) # adjust for merkle root moving_average_reply_time.add(replied_time - response_signing_time) moving_average_image_show_time.add(image_showed_time - replied_time) total_time = moving_average_receive_time.get_moving_average() \ + moving_average_decompress_time.get_moving_average() \ + moving_average_verify_image_sig_time.get_moving_average() \ + moving_average_img_preprocessing_time.get_moving_average() \ + moving_average_model_inference_time.get_moving_average() \ + moving_average_img_postprocessing_time.get_moving_average() \ + moving_average_response_signing_time.get_moving_average() \ + moving_average_reply_time.get_moving_average() \ + moving_average_image_show_time.get_moving_average() # count seconds it takes to process 400 images after a 800 frames warm-up time if (image_count == 800): a = time.perf_counter() if (image_count == 1200): a = time.perf_counter() - a print(a) # terminal prints if image_count % 20 == 0: print( " total: %4.1fms (%4.1ffps) " " receiving %4.1f (%4.1f%%) " " decoding %4.1f (%4.1f%%) " " verifying %4.1f (%4.1f%%) " " preprocessing %4.1f (%4.1f%%) " " model inference %4.1f (%4.1f%%) " " postprocessing %4.1f (%4.1f%%) " " signing %4.1f (%4.1f%%) " " replying %4.1f (%4.1f%%) " " display %4.1f (%4.1f%%) " % ( 1000 / moving_average_fps.get_moving_average(), moving_average_fps.get_moving_average(), moving_average_receive_time.get_moving_average() * 1000, moving_average_receive_time.get_moving_average() / total_time * 100, moving_average_decompress_time.get_moving_average() * 1000, moving_average_decompress_time.get_moving_average() / total_time * 100, moving_average_verify_image_sig_time.get_moving_average() * 1000, moving_average_verify_image_sig_time.get_moving_average() / total_time * 100, moving_average_img_preprocessing_time.get_moving_average() * 1000, moving_average_img_preprocessing_time.get_moving_average() / total_time * 100, moving_average_model_inference_time.get_moving_average() * 1000, moving_average_model_inference_time.get_moving_average() / total_time * 100, moving_average_img_postprocessing_time.get_moving_average( ) * 1000, moving_average_img_postprocessing_time.get_moving_average( ) / total_time * 100, moving_average_response_signing_time.get_moving_average() * 1000, moving_average_response_signing_time.get_moving_average() / total_time * 100, moving_average_reply_time.get_moving_average() * 1000, moving_average_reply_time.get_moving_average() / total_time * 100, moving_average_image_show_time.get_moving_average() * 1000, moving_average_image_show_time.get_moving_average() / total_time * 100, ), end='\r') # counter image_count += 1
def main(): # get paramters and contract details # print(contractHash) #preprocess_queue = queue.LifoQueue() #inference_queue = queue.LifoQueue() preprocess_queue = mp.Queue() #inference_queue = mp.Queue() # postprocess_queue = Queue() dg = Datagetter2.Datagetter2() #p1 = mp.Process(target=inference, args= (preprocess_queue, inference_queue)) #p2 = mp.Process(target=preprocessing, args=(dg,)) p2 = mp.Process(target=preprocessing, args=(dg, )) #p1 = Process(target=dummy) #p2 = Process(target=dummy) # p3 = Process(target=Show_Image_mp, args=(Processed_frames, show, Final_frames)) #p1.start() p2.start() # p3.start() sk = SigningKey(Parameters.private_key_contractor) contractHash = Helperfunctions.hashContract().encode('latin1') dont_show = Parameters.dont_show merkle_tree_interval = OutsourceContract.merkle_tree_interval hostname = Parameters.ip_outsourcer # Use to receive from other computer port = Parameters.port_outsourcer sendingPort = Parameters.sendingPort #import tensorflow as tf # time.sleep(1.0) # configure responder responder = re.Responder(hostname, sendingPort) # statistics info moving_average_points = 50 # statistics moving_average_fps = MovingAverage(moving_average_points) moving_average_receive_time = MovingAverage(moving_average_points) moving_average_decompress_time = MovingAverage(moving_average_points) # moving_average_model_load_image_time = MovingAverage(moving_average_points) moving_average_img_preprocessing_time = MovingAverage( moving_average_points) moving_average_model_inference_time = MovingAverage(moving_average_points) moving_average_img_postprocessing_time = MovingAverage( moving_average_points) moving_average_reply_time = MovingAverage(moving_average_points) moving_average_image_show_time = MovingAverage(moving_average_points) moving_average_verify_image_sig_time = MovingAverage(moving_average_points) moving_average_response_signing_time = MovingAverage(moving_average_points) image_count = 0 a = 0 b = 0 if merkle_tree_interval > 0: mt = MerkleTools() mtOld = MerkleTools() interval_count = 0 mtOld_leaf_indices = {} mt_leaf_indices = {} # rendundancy_counter = 0 # rendundancy_counter2 = 0 current_challenge = 1 merkle_root = '' # stringsend = '' last_challenge = 0 image_showed_time = time.perf_counter() # init import tensorflow as tf import core.utils as utils from tensorflow.python.saved_model import tag_constants from tensorflow.compat.v1 import InteractiveSession from tensorflow.compat.v1 import ConfigProto from core.functions import count_objects, crop_objects from core.config import cfg from core.utils import read_class_names import os import random from core.yolov4 import filter_boxes tf.keras.backend.clear_session() input_size = Parameters.input_size model = OutsourceContract.model framework = Parameters.framework tiny = OutsourceContract.tiny weights = Parameters.weights iou = Parameters.iou score = Parameters.score physical_devices = tf.config.experimental.list_physical_devices('GPU') try: if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) except: pass # configure gpu usage config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) # load model if framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=weights) else: saved_model_loaded = tf.saved_model.load(weights, tags=[tag_constants.SERVING]) # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) count = Parameters.count info = Parameters.info crop = Parameters.crop while True: queueData = dg.get_data() if queueData != -1: #queueData = preprocess_queue.get() #while not preprocess_queue.empty(): # queueData = preprocess_queue.get() #queueData = dg.get_data() a = time.perf_counter() #preprocess_queue.task_done() images_data = queueData[0] name = queueData[1] original_image = queueData[2] #preprocess_queue.task_done() if framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if model == 'yolov3' and tiny == True: boxes, pred_conf = filter_boxes( pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes( pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=score) # 1.2ms # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = original_image.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) # 1ms #-> no tf needed # hold all detection data in one variable pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) # allowed_classes = ['person'] # if crop flag is enabled, crop each detection and save it as new image if crop: crop_path = os.path.join(os.getcwd(), 'detections', 'crop', image_name) try: os.mkdir(crop_path) except FileExistsError: pass crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox, crop_path, allowed_classes) if count: # count objects found counted_classes = count_objects( pred_bbox, by_class=False, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) boxtext, image = utils.draw_bbox( original_image, pred_bbox, info, counted_classes, allowed_classes=allowed_classes) else: boxtext, image = utils.draw_bbox( original_image, pred_bbox, info, allowed_classes=allowed_classes) # 0.5ms image = Image.fromarray(image.astype(np.uint8)) # 0.3ms #inference_queue.put((boxtext, image, name)) #while True: # start_time = time.perf_counter() # if not inference_queue.empty(): # queueData=inference_queue.get() # while not inference_queue.empty(): # queueData=inference_queue.get() start_time = image_showed_time # # boxes, scores, classes, valid_detections, name, original_image #queueData=inference_queue.get() #inference_queue.task_done() # boxes=queueData[0] # scores=queueData[1] # classes=queueData[2] # valid_detections=queueData[3] # name = queueData[4] # original_image = queueData[5] # boxtext = queueData[0] # image = queueData[1] # name = queueData[2] if merkle_tree_interval > 0: outsorucer_signature = name[:-5] outsourcer_image_count = name[-5] outsourcer_number_of_outputs_received = name[-4] outsourcer_random_number = name[-3] outsourcer_interval_count = name[-2] outsourcer_time_to_challenge = bool(name[-1]) received_time = time.perf_counter() image_preprocessing_time = time.perf_counter() decompressed_time = time.perf_counter() verify_time = time.perf_counter() # inference # region # endregion model_inferenced_time = time.perf_counter() # image postprocessing # region h = time.perf_counter() # endregion if merkle_tree_interval == 0: boxtext = 'Image' + str(name[-2]) + ':;' + boxtext else: boxtext = 'Image' + str( outsourcer_image_count) + ':;' + boxtext image_postprocessing_time = time.perf_counter() # sign message ->need to add image_count/interval_count (for merkle tree sig), contract hash to output and verificaton if merkle_tree_interval == 0: # sig = sk.sign_deterministic(boxtext.encode('latin1')) sig = sk.sign(boxtext.encode('latin1') + contractHash).signature # sig = list(sig) sig = sig.decode('latin1') # send reply responder.respond(boxtext + ';--' + sig) else: # print(image_count) # add leafs dynamiclly to merkle tree mt.add_leaf(boxtext, True) # remember indices for challenge mt_leaf_indices[ outsourcer_image_count] = image_count % merkle_tree_interval # print(image_count % merkle_tree_interval) response = boxtext # time to send a new merkle root # e.g. if inervall = 128 then all respones from 0-127 are added to the merkle tree if image_count > 1 and (image_count + 1) % merkle_tree_interval == 0: # print(image_count) a = time.perf_counter() # rendundancy_counter = 2 mt.make_tree() merkle_root = mt.get_merkle_root() sig = sk.sign( merkle_root.encode('latin1') + bytes(interval_count) + contractHash).signature # sign merkle root # resond with merkle root response += ';--' + str(merkle_root) + \ ';--' + sig.decode('latin1') interval_count += 1 mtOld = mt # save old merkle tree for challenge # mtOld_leaf_indices.clear() # clear old indices mtOld_leaf_indices.clear() mtOld_leaf_indices = mt_leaf_indices.copy( ) # save old indices for challenge # print(mtOld_leaf_indices) mt_leaf_indices.clear() # clear for new indices # mt_leaf_indices = {} mt = MerkleTools( ) # construct new merkle tree for next interval te = time.perf_counter() - a # print('1', te, image_count) else: # if this is true then the outsourcer has not received the merkle root yet -> send again if interval_count > outsourcer_image_count: sig = sk.sign( merkle_root.encode('latin1') + bytes(interval_count) + contractHash).signature # sign merkle root response += ';--' + str(merkle_root) + \ ';--' + sig.decode('latin1') # print('2', image_count) else: # in this case outsourcer has confirmed to have recieved the merkle root # in this case outsourcer has sent a challenge to meet with the old merkle tree, give outsourcer 3 frames time to confirm challenge received before sending again if outsourcer_time_to_challenge and image_count - last_challenge > 3: last_challenge = image_count if outsourcer_random_number in mtOld_leaf_indices: # if challenge can be found, send proof back outsourcer_random_number_index = mtOld_leaf_indices[ outsourcer_random_number] else: # if challenge index cannot be found return leaf 0 outsourcer_random_number_index = 0 # print('proof index not found') proofs = mtOld.get_proof( outsourcer_random_number_index) stringsend = '' for proof in proofs: stringsend += ';--' # indicate start of proof stringsend += proof.__str__() # send proof stringsend += ';--' # send leaf stringsend += mtOld.get_leaf( outsourcer_random_number_index) stringsend += ';--' stringsend += mtOld.get_merkle_root() # send root stringarr = [] stringarr = stringsend.split(';--') leaf_node = stringarr[-2] root_node = stringarr[-1] proof_string = stringarr[0:-2] # sign proof and contract details sig = sk.sign( str(stringarr[1:]).encode('latin1') + bytes(interval_count - 1) + contractHash).signature # print(str(stringarr).encode('latin1') + bytes(interval_count-1) + contractHash) # print(stringarr) # attach signature response += ';--' + sig.decode('latin1') response += stringsend # attach challenge response to response # print('3', te, image_count) responder.respond(response) response_signing_time = time.perf_counter() # print(response_signing_time- image_postprocessing_time) replied_time = time.perf_counter() # display image if not dont_show: # image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imshow('raspberrypi', image) if cv2.waitKey(1) == ord('q'): responder.respond('abort12345:6') sys.exit( 'Contract aborted: Contractor ended contract according to custom' ) image_showed_time = time.perf_counter() b = time.perf_counter() print('inf', b - a) # statistics moving_average_fps.add(1 / (image_showed_time - start_time)) moving_average_receive_time.add(received_time - start_time) moving_average_decompress_time.add(decompressed_time - received_time) moving_average_verify_image_sig_time.add(verify_time - decompressed_time) moving_average_img_preprocessing_time.add( image_preprocessing_time - verify_time) moving_average_model_inference_time.add(model_inferenced_time - image_preprocessing_time) moving_average_img_postprocessing_time.add( image_postprocessing_time - model_inferenced_time) moving_average_response_signing_time.add( response_signing_time - image_postprocessing_time) # adjust for merkle root moving_average_reply_time.add(replied_time - response_signing_time) moving_average_image_show_time.add(image_showed_time - replied_time) total_time=moving_average_receive_time.get_moving_average() \ + moving_average_decompress_time.get_moving_average() \ + moving_average_verify_image_sig_time.get_moving_average() \ + moving_average_img_preprocessing_time.get_moving_average() \ + moving_average_model_inference_time.get_moving_average() \ + moving_average_img_postprocessing_time.get_moving_average() \ + moving_average_response_signing_time.get_moving_average() \ + moving_average_reply_time.get_moving_average() \ + moving_average_image_show_time.get_moving_average() if (image_count == 800): a = time.perf_counter() if (image_count == 1200): a = time.perf_counter() - a print(a) # terminal prints if image_count % 20 == 0: print( " total: %4.1fms (%4.1ffps) " " receiving %4.1f (%4.1f%%) " " decoding %4.1f (%4.1f%%) " " verifying %4.1f (%4.1f%%) " " preprocessing %4.1f (%4.1f%%) " " model inference %4.1f (%4.1f%%) " " postprocessing %4.1f (%4.1f%%) " " signing %4.1f (%4.1f%%) " " replying %4.1f (%4.1f%%) " " display %4.1f (%4.1f%%) " % ( 1000 / moving_average_fps.get_moving_average(), moving_average_fps.get_moving_average(), moving_average_receive_time.get_moving_average() * 1000, moving_average_receive_time.get_moving_average() / total_time * 100, moving_average_decompress_time.get_moving_average() * 1000, moving_average_decompress_time.get_moving_average() / total_time * 100, moving_average_verify_image_sig_time. get_moving_average() * 1000, moving_average_verify_image_sig_time. get_moving_average() / total_time * 100, moving_average_img_preprocessing_time. get_moving_average() * 1000, moving_average_img_preprocessing_time. get_moving_average() / total_time * 100, moving_average_model_inference_time.get_moving_average( ) * 1000, moving_average_model_inference_time.get_moving_average( ) / total_time * 100, moving_average_img_postprocessing_time. get_moving_average() * 1000, moving_average_img_postprocessing_time. get_moving_average() / total_time * 100, moving_average_response_signing_time. get_moving_average() * 1000, moving_average_response_signing_time. get_moving_average() / total_time * 100, moving_average_reply_time.get_moving_average() * 1000, moving_average_reply_time.get_moving_average() / total_time * 100, moving_average_image_show_time.get_moving_average() * 1000, moving_average_image_show_time.get_moving_average() / total_time * 100, ), end='\r') # counter image_count += 1
def pre(): from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession config = ConfigProto() config.gpu_options.allow_growth = True InteractiveSession(config=config)
def main(_argv): # MQTT 설정 #broker_address = "172.20.10.6" #소진 broker_address = "192.168.0.7" # nstl_sub #broker_address = "192.168.137.27" # client = mqtt.Client() config = ConfigProto() client.on_connect = on_connect client.on_disconnect = on_disconnect client.connect(broker_address, 1883) client.loop_start() client.on_message = on_message client.subscribe("Cam") client.subscribe("mode") client.subscribe("motor_front") client.subscribe("motor_back") client.subscribe("motor_object") # Yolov4-tiny 설정 config.gpu_options.allow_growth = True cv2.ocl.setUseOpenCL(False) session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: #vid = cv2.VideoCapture('http://172.20.10.6:8081/?action=stream') # motion web streaming - 소진 vid = cv2.VideoCapture('http://192.168.0.7:8081/?action=stream' ) # motion web streaming - nstl_sub #vid = cv2.VideoCapture('http://192.168.137.27:8081/?action=stream') # motion web streaming - 소진1 except: print('error: empty camera!') # out = None global redetect_ok, ok, mid, motor_camera, buzzer, isFirst, count, tracker, isDetect idx = 0 # 저장할 사진 인덱스 while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) # numpy배열을 Image객체로 바꿈 else: print('Video has ended or failed, try a different video format!') break if isFirst: # YOLO로 사람 검출 # frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) pred_bbox = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] roi = utils.detect_roi(frame, pred_bbox) (a, b, c, d) = roi # 변경전 좌표 저장 roi = check_roi(roi) if roi[2] and roi[3]: # 위치 설정 값이 있는 경우 tracker = cv2.TrackerKCF_create() # 트래커 객체 생성 tracker.init( frame, roi ) # Initialize tracker with first frame and bounding box user = frame[int(b):int(d + b), int(a):int(c + a)] if count == 0: cv2.imwrite('./user/face1.jpg', user) # 정면 사진 촬영 count += 1 if count < 75 * idx: continue else: idx += 1 path = './user/face' + str(idx) + '.jpg' print(idx) cv2.imwrite(path, user) # 왼쪽 사진 촬영 client.publish('OK', 'camera') if idx == 4: isFirst = False # 사용자 등록 종료 idx = 0 else: continue if isDetect: # Tracking(KCF) homography = matching.featureDetect(frame) homography = check_roi(homography) if tracker is None: continue if tracker is not None: tracker.init(frame, homography) if (homography != 0, 0, 0, 0): # 60%일치하고, box좌표가 존재한다면 redetect_ok, boxes = tracker.update( frame) # 새로운 프레임에서 추적 위치 찾기 (x, y, w, h) = boxes if redetect_ok: # 적당한 크기의 box일 경우 # print('재인식끝') isDetect = False # 재인식 종료 client.publish('buzzer', 'off') # buzzer 종료 if mode == 'auto': client.publish('motor_camera', 'motor start') else: isDetect = True redetect_ok = False # tracker_kcf if tracker is None: # 트래커가 생성 안 된 경우 cv2.putText(frame, "Cannot detect person", (100, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2, cv2.LINE_AA) else: if ok: ok, boxes = tracker.update(frame) # 새로운 프레임에서 추적 위치 찾기 (x, y, w, h) = boxes cv2.rectangle(frame, (int(x), int(y)), (int(x + w), int(y + h)), (0, 255, 0), 2, 1) mid = int(x + (w / 2)) elif redetect_ok: # 재인식 redetect_ok, boxes = tracker.update( frame) # 새로운 프레임에서 추적 위치 찾기 (x, y, w, h) = boxes cv2.rectangle(frame, (int(x), int(y)), (int(x + w), int(y + h)), (0, 0, 255), 2, 1) # 사각형 그리기 mid = int(x + (w / 2)) # 사각형의 중간 좌표 else: # 인식이 되지 않는 경우 isDetect = True # 재인식 실행 tracker.clear() # Tracker 초기화 tracker = cv2.TrackerKCF_create() # 트래커 객체 생성 cv2.putText(frame, "Tracking fail.", (100, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2, cv2.LINE_AA) client.publish('buzzer', 'on') # 부저 실행을 위한 MQTT 값 전송 result = np.asarray(image) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows() # 종료 # MQTT 종료 client.loop_stop() client.disconnect() client.loop_forever()
def main(): #Each subdirectory will get a numeric label by alphabetical order, starting with 0 #For each subdirectory (class) train_meta_data = [] test_meta_data = [] counts = [0,0] dataset_path_train = "/data/fm_tools/autofm/wholeImagesBacon/train" dataset_path_train_masks = "/data/fm_tools/autofm/wholeImagesBacon/masks/train" classes = sorted(os.walk(dataset_path_train).__next__()[1]) for label, c in enumerate(classes): c_dir = os.path.join(dataset_path_train, c) walk = os.walk(c_dir).__next__() file_list = walk[2] for sample in file_list: if sample.endswith('png'): one_hot = np.zeros(2) one_hot[label] = 1 mask_path = os.path.join(dataset_path_train_masks, c_dir.split("/")[-1], sample) if label == 0: mask_path = '/data/fm_tools/autofm/wholeImagesBacon/clean_mask.png' train_meta_data.append([os.path.join(c_dir, sample), one_hot, mask_path]) dataset_path_test = "/data/fm_tools/autofm/wholeImagesBacon/test" dataset_path_test_masks = "/data/fm_tools/autofm/wholeImagesBacon/masks/test" classes = sorted(os.walk(dataset_path_test).__next__()[1]) for label, c in enumerate(classes): c_dir = os.path.join(dataset_path_test, c) walk = os.walk(c_dir).__next__() file_list = walk[2] for sample in file_list: if sample.endswith('png'): one_hot = np.zeros(2) one_hot[label] = 1 mask_path = os.path.join(dataset_path_test_masks, c_dir.split("/")[-1], sample) if label == 0: mask_path = '/data/fm_tools/autofm/wholeImagesBacon/clean_mask.png' test_meta_data.append([os.path.join(c_dir, sample), one_hot, mask_path]) ''' We are shuffleing here as well as letting tf.dataset shuffle because the tf.dataset shuffle cannot do a uniform shuffle on large datasets ''' random.shuffle(train_meta_data) random.shuffle(test_meta_data) ''' Create dataset pipeline. The pipeline takes in a list of filenames and an array of labels (not one-hot). We are using a tensroflow reinitializable iterator, which allows us to switch between test and train datasets at train time by calling iterator.make_initializer(). ''' #Train image_ds_train = tf.data.Dataset.from_tensor_slices(np.array(train_meta_data)[:,0]) image_ds_train = image_ds_train.map(preprocess_train, num_parallel_calls=12) mask_ds_train = tf.data.Dataset.from_tensor_slices(np.array(train_meta_data)[:,2]) mask_ds_train = mask_ds_train.map(preprocess_train_mask, num_parallel_calls=12) #image_ds_train = image_ds_train.map(apply_image_augmentation, num_parallel_calls=12) label_ds_train = tf.data.Dataset.from_tensor_slices( np.stack(np.array(train_meta_data)[:,1], axis=0)) #Test image_ds_test = tf.data.Dataset.from_tensor_slices(np.array(test_meta_data)[:,0]) image_ds_test = image_ds_test.map(preprocess_test, num_parallel_calls=12) mask_ds_test = tf.data.Dataset.from_tensor_slices(np.array(test_meta_data)[:,2]) mask_ds_test = mask_ds_test.map(preprocess_test_mask, num_parallel_calls=12) label_ds_test = tf.data.Dataset.from_tensor_slices( np.stack(np.array(test_meta_data)[:,1], axis=0)) dataset_train = tf.data.Dataset.zip((image_ds_train, label_ds_train, mask_ds_train)) dataset_train = dataset_train.shuffle(buffer_size=1000) #buffer_size determins uniformity of the shuffle dataset_train = dataset_train.batch(BATCH_SIZE) dataset_train = dataset_train.prefetch(buffer_size=AUTOTUNE) dataset_test = tf.data.Dataset.zip((image_ds_test, label_ds_test, mask_ds_test)) dataset_test = dataset_test.shuffle(buffer_size=1000) #buffer_size determins uniformity of the shuffle dataset_test = dataset_test.batch(BATCH_SIZE) dataset_test = dataset_test.prefetch(buffer_size=AUTOTUNE) iterator = tf.data.Iterator.from_structure(dataset_train.output_types, dataset_test.output_shapes) X_BATCH , Y_BATCH, Z_BATCH = iterator.get_next() training_init_op = iterator.make_initializer(dataset_train) validation_init_op = iterator.make_initializer(dataset_test) X_BATCH = tf.identity(X_BATCH, "images_input") Y_BATCH = tf.identity(Y_BATCH, "labels_input") Z_BATCH = tf.identity(Z_BATCH, "masks_input") network = FCN_Model(X_BATCH, Z_BATCH) #X_BATCH is images Y_BATCH is labels #network.x = tf.identity(network.x, "kee_rate_input") output_op = tf.get_default_graph().get_operation_by_name("Softmax") #_ = tf.identity(output_op.outputs[0], "Softmax_output") NUM_EPOCHS = 2000 learning_rate = 5e-4 optimizer = tf.train.AdamOptimizer(learning_rate).minimize(network.loss) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=500) config = ConfigProto() config.gpu_options.allow_growth = True sess = InteractiveSession(config=config) with sess.graph.as_default(): sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.local_variables_initializer()) saver.restore(sess, "model1322") print("Training started") for epoch in range(NUM_EPOCHS): #random.shuffle(train_meta_data) #random.shuffle(test_meta_data) #Train epoch #Set to use train data sess.run(training_init_op) step = 0 train_losses = [] train_accuracies = [] allLabels = [] try: while True: softmax, images, masks, train_loss, _, pred, accuracy = sess.run([network.softmax, network.original_image, network.mask, network.loss, optimizer, network.predictions, network.accuracy]) train_losses.append(train_loss) train_accuracies.append(accuracy[0]) print("Train accuracy =", "%.3f" % accuracy[0], "Train loss =", train_loss, end='\r') step += 1 except tf.errors.OutOfRangeError: #This looks like a hack but this is the correct way to tell when an epoch is complete pass print("Epoch" , epoch, "complete. Train accuracy for epoch was", np.mean(train_accuracies), "train loss was", np.mean(train_losses)) #Test epoch #Set to use test_data sess.run(validation_init_op) test_accuracies = [] try: while True: pred, softmax, images, masks, test_loss, accuracy = sess.run([network.predictions, network.softmax, network.original_image, network.mask, network.loss, network.accuracy]) print("Test accuracy =", accuracy[0], end='\r') test_accuracies.append(accuracy[0]) cv2.imshow("prediction", np.squeeze(pred[0]).astype(np.uint8)*255) cv2.imshow("softmax", (np.squeeze(softmax[0][:,:,1])*255).astype(np.uint8)) cv2.imshow("mask", np.squeeze(masks[0]).astype(np.uint8)*255) cv2.imshow("image", np.squeeze(images[0]).astype(np.uint8)[...,::-1]) cv2.waitKey(1) print("Test accuracy =", "%.3f" % accuracy[0], "Test loss =", test_loss, end='\r') except tf.errors.OutOfRangeError: pass print("Test accuracy after epoch was", np.mean(test_accuracies)) saver.save(sess, "model1322") freeze.create_pb("model1322") print("Checkpoint saved and pb created.")
def detect(parameters, infer_model, image): """Detect the classes of a given image Args: parameters (dictionary): input parameters - input_size: input size of the model - score_thres: score threshold to draw a box - model: model to transform - weights_tf: path to save the tf weights - output_path: path where save the result image - iou: Intersection Over Union infer_model (tensorflow): loaded tensorflow model image (numpy array): image to detect Returns: tf.make_ndarray(proto_scores), tf.make_ndarray(proto_classes), pred_bbox [tfarray]: array with the precission of the class detection [tfarray]: array with the class predicted """ config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config( tiny=False, model=parameters['model']) input_size = parameters['input_size'] original_image = image image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) batch_data = tf.constant(images_data) pred_bbox = infer_model(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=parameters['iou'], score_threshold=parameters['score_thres']) pred_bbox = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] image = utils.draw_bbox(original_image, pred_bbox) image = Image.fromarray(image.astype(np.uint8)) # image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(parameters['output_path'], image) proto_scores = tf.make_tensor_proto(scores) proto_classes = tf.make_tensor_proto(classes) return tf.make_ndarray(proto_scores), tf.make_ndarray(proto_classes)
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) print('fps_orig ', fps) target_fps = fps / FLAGS.factor print('target_fps ', target_fps) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, target_fps, (width, height)) frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: # print('fps_orig ', fps_orig) if target_fps > 0: if vid.get(1) % FLAGS.factor == 0: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: continue else: print('target_fps is 0') exit() else: print('Video has ended or failed, try a different video format!') break frame_num += 1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size images = FLAGS.images # load model if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) # loop through images in list and run Yolov4 model on each for count, image_path in enumerate(images, 1): original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) if FLAGS.framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] # run non max suppression on detections boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = original_image.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) # hold all detection data in one variable pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] if FLAGS.count: # count objects found counted_classes = count_objects(pred_bbox, FLAGS.by_class) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, counted_classes) else: image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info) image = Image.fromarray(image.astype(np.uint8)) if not FLAGS.dont_show: image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image)
def inference(preprocess_queue, inference_queue): import tensorflow as tf import core.utils as utils from tensorflow.python.saved_model import tag_constants from tensorflow.compat.v1 import InteractiveSession from tensorflow.compat.v1 import ConfigProto from core.functions import count_objects, crop_objects from core.config import cfg from core.utils import read_class_names import os import random from core.yolov4 import filter_boxes tf.keras.backend.clear_session() input_size = Parameters.input_size model = OutsourceContract.model framework = Parameters.framework tiny = OutsourceContract.tiny weights = Parameters.weights iou = Parameters.iou score = Parameters.score physical_devices = tf.config.experimental.list_physical_devices('GPU') try: if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) except: pass # configure gpu usage config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) # load model if framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=weights) else: saved_model_loaded = tf.saved_model.load(weights, tags=[tag_constants.SERVING]) # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) count = Parameters.count info = Parameters.info crop = Parameters.crop while True: if not preprocess_queue.empty(): queueData = preprocess_queue.get() while not preprocess_queue.empty(): queueData = preprocess_queue.get() #preprocess_queue.task_done() images_data = queueData[0] name = queueData[1] original_image = queueData[2] #preprocess_queue.task_done() if framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if model == 'yolov3' and tiny == True: boxes, pred_conf = filter_boxes( pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes( pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=score) # 1.2ms # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = original_image.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) # 1ms #-> no tf needed # hold all detection data in one variable pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) # allowed_classes = ['person'] # if crop flag is enabled, crop each detection and save it as new image if crop: crop_path = os.path.join(os.getcwd(), 'detections', 'crop', image_name) try: os.mkdir(crop_path) except FileExistsError: pass crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox, crop_path, allowed_classes) if count: # count objects found counted_classes = count_objects( pred_bbox, by_class=False, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) boxtext, image = utils.draw_bbox( original_image, pred_bbox, info, counted_classes, allowed_classes=allowed_classes) else: boxtext, image = utils.draw_bbox( original_image, pred_bbox, info, allowed_classes=allowed_classes) # 0.5ms image = Image.fromarray(image.astype(np.uint8)) # 0.3ms inference_queue.put((boxtext, image, name))
def main(image_dir="./", net_loc="../cnn_mnist_10c.h5"): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) K.set_session(session) print(image_dir) print(net_loc) # from ptpdb import set_trace # set_trace() # imcollection = np.array(imread_collection(image_dir))[:, :, :, 0] imcollection = np.array(imread_collection(f"{image_dir}/*.png")) net_generated_data = np.expand_dims(imcollection, 3) x_real_train, x_real_test = keras_extract_mnist_digits() num_samples = min(len(net_generated_data), len(x_real_test)) x_real_train = x_real_train / 255 x_real_test = x_real_test / 255 net_generated_data = net_generated_data / 255 np.random.shuffle(x_real_train) np.random.shuffle(x_real_test) np.random.shuffle(net_generated_data) x_real_train = x_real_train[:num_samples] x_real_test = x_real_test[:num_samples] full_classifier = keras.models.load_model(net_loc) req_layer = "flatten_1" classifier = Model( inputs=full_classifier.input, outputs=full_classifier.get_layer(req_layer).output, ) print("Calculating FCD for train data") fcd_train = compute_real_fcd(x_real_train, classifier) print("Calculating FCD for test data") fcd_test = compute_real_fcd(x_real_test, classifier) print( f"samples = {num_samples} train fcd = {fcd_train:.3g} test fcd = {fcd_test:.3g}" ) net_real_data = x_real_train assert len(net_generated_data) == len(net_real_data) print( np.max(net_generated_data), np.min(net_generated_data), f"{np.std(net_generated_data):.3f}", f"{np.mean(net_generated_data):.3f}", ) print( np.max(net_real_data), np.min(net_real_data), f"{np.std(net_real_data):.3f}", f"{np.mean(net_real_data):.3f}", ) real_act = classifier.predict(net_real_data) print(real_act.shape) gen_act = classifier.predict(net_generated_data) print("Calculating FCD for generated data") fcd_tensor = diagonal_only_frechet_classifier_distance_from_activations( tf.convert_to_tensor(real_act), tf.convert_to_tensor(gen_act)) fcd = session.run(fcd_tensor) print(f"fcd = {fcd:.3g}") session.close() sys.exit(0) fcd_iters = 2 gen_fcd_arr = [] for fcd_i in range(fcd_iters): # inverse normalization due to tanh # net_generated_data = (net_generated_data + 1) / 2 net_real_data = x_real_train assert len(net_generated_data) == len(net_real_data) print( np.max(net_generated_data), np.min(net_generated_data), f"{np.std(net_generated_data):.3f}", f"{np.mean(net_generated_data):.3f}", ) print( np.max(net_real_data), np.min(net_real_data), f"{np.std(net_real_data):.3f}", f"{np.mean(net_real_data):.3f}", ) np.random.shuffle(net_generated_data) np.random.shuffle(net_real_data) real_act = classifier.predict(net_real_data) gen_act = classifier.predict(net_generated_data) print("Calculating FCD for generated data") fcd_tensor = diagonal_only_frechet_classifier_distance_from_activations( tf.convert_to_tensor(real_act), tf.convert_to_tensor(gen_act)) sess = K.get_session() fcd = sess.run(fcd_tensor) gen_fcd_arr.append(fcd)
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = os.getenv( 'HOME' ) + '/st_mini/src/scout_mini_ros/scout_bringup/model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size if use_webcam: video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture if use_webcam: try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) # 로봇 모터 제어를 위한 초깃값 설정 x = 0 y = 0 z = 0 th = 0 speed = 0.7 turn = 1 # 변수 추가 cx, cy, h = 0, 0, 0 frame_num = 0 # Depth camera class 불러오기 if not use_webcam: dc = DepthCamera() # 장애물 영역 기본값 받아오기 default = Default_dist() # ROS class init go = scout_pub_basic() rate = rospy.Rate(60) # 타겟 아이디 초깃값(sub 객체가 있는 사람 id를 아래에서 할당 예정) target_id = False # while video is running while not rospy.is_shutdown(): if use_webcam: return_value, frame = vid.read() else: # depth camera 사용 return_value, depth_frame, frame = dc.get_frame() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num += 1 if not use_webcam: # 장애물 회피를 위한 ROI 디폴트 세팅하기 (현재는 10프레임만) 추가 if frame_num < 11: default.default_update(depth_frame) continue print('Frame #: ', frame_num) # frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ['person', 'cell phone', 'tie', 'stop sign'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # <st-mini 제어를 위한 Publisher code> go.update(x, y, z, th, speed, turn) go.sendMsg() # 좌/우 회전 한곗값 설정 left_limit = frame.shape[1] // 2 - 100 right_limit = frame.shape[1] // 2 + 100 # # 좌/우 회전 속도 증가 구간 설정 # left_max = frame.shape[1]//4 # right_max = (frame.shape[1]//4)*3 # 로봇의 최대,최소 속도 설정 # <!--선속도--!> max_speed = 0.8 min_speed = 0.5 # <!--각속도--!> max_turn = 1.3 min_turn = 0.8 # <!-┌---------------------------------- sub 객체를 이용한 person id 할당하기 -----------------------------------┐!> # track id list 만들기 track_id_list = [] # person id to bbox dict 만들기 person_id_to_bbox = {} """아래와 같은 형식의 데이터 person_id_to_bbox = { 1 : [100,200,300,400] # id : bbox } """ # recognition bbox list 만들기 recognition_bbox_list = [] # 인식용 객체(인식용 객체가 사람 bbox 안에 있다면 그 사람의 id를 계속 추적, 이후 target lost를 하게 되면 동일하게 인식 가능) recognition_object = 'tie' # track id list 만들기 for track in tracker.tracks: class_name = track.get_class() track_id = track.track_id bbox = track.to_tlbr() # track id list 만들기 track_id_list.append(track.track_id) if class_name == 'person': person_id_to_bbox[track_id] = bbox elif class_name == recognition_object: recognition_bbox_list.append(bbox) print('person_id_to_bbox: {}, recognition_bbox_list: {}'.format( person_id_to_bbox, recognition_bbox_list)) # person bbox 내부에 recognition object bbox가 있으면 해당 person id를 계속 추적하도록 target_id 할당 if target_id == False: # False라면 사람 id를 할당해야한다. # try: for person_id, person_bbox in person_id_to_bbox.items(): # person_bbox = list(map(int, person_bbox)) for rec_bbox in recognition_bbox_list: # rec_bbox = list(map(int, rec_bbox)) # if person_bbox[0] <0: person_bbox[0] = 0 # elif person_bbox[1] < 0: person_bbox[1] = 0 # elif person_bbox[2] > frame.shape[0]: person_bbox[2] = frame.shape[0] # elif person_bbox[3] > frame.shape[1]: person_bbox[3] = frame.shape[1] if rec_bbox[0] >= person_bbox[0] and rec_bbox[ 1] >= person_bbox[1] and rec_bbox[2] <= person_bbox[ 2] and rec_bbox[3] <= person_bbox[3]: target_id = person_id print('target id 할당 성공') else: target_id = False print('target id 할당 실패') print('target_id: ', target_id) # <!-└--------------------------------- sub 객체를 이용한 person id 할당하기 ----------------------------------┘!> # 추적 알고리즘 if len(tracker.tracks) == 0: # 추적할 객체가 없다면 정지 key = 'stop' print('There are no objects to track.') else: # 추적할 객체가 있다면 동작 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue track_id = track.track_id bbox = track.to_tlbr() class_name = track.get_class() if target_id == track_id and class_name == 'person': # cx, cy 계산 추가 w, h = int(bbox[2] - bbox[0]), int(bbox[3] - bbox[1]) cx, cy = int(w / 2 + bbox[0]), int(h / 2 + bbox[1]) # 사람과 로봇의 거리 person_distance person_distance = person_dist(depth_frame, cx, cy, h) print('사람과의 거리: ', person_distance) # 직진 안전 구간 최대/최소값 stable_max_dist = 2500 stable_min_dist = 2000 if person_distance < stable_min_dist: print('Too Close') key = 'stop' else: """ depth값 활용 1. Target과의 거리[전진] 1) 적정거리: 1.5m ~ 2.0m --> linear.x = 1 2) 위험거리: ~1.5m --> linear.x = 0 3) 추격거리: 2.0m~ --> linear.x += 0.2 (적정거리가 될 때까지) 2. Target의 중심점을 이용해 좌우 회전 1) 중심점 cx, cy는 아래와 같이 구할 수 있다. width = bbox[2] - bbox[0] height = bbox[3] - bbox[1] cx = int(width/2 + bbox[0]) cy = int(height/2 + bbox[1]) 좌우 판단이기 때문에 cx만 사용. 2) cx값 설정 중 주의 사항 target이 화면 밖으로 점점 나갈수록 bbox의 좌측 상단 x좌표는 음수 혹은 frame width(여기서는 640)보다 커질 수 있다. 즉, cx의 값을 설정할 때 bbox[0]값이 음수 또는 640을 초과하면 좌우 회전 즉시 실시해야 함. depth camera를 통해 장애물 유무를 먼저 판단하고 없다면 Target과의 거리/방향 측정 후 최종 발행값 결정. """ # 좌/우 회전 속도 증가 구간 설정 left_max = frame.shape[1] // 4 right_max = (frame.shape[1] // 4) * 3 max_speed = 1.5 min_speed = 1.2 """ 정지 조건(3/3): 장애물이 있을 때, 정확히는 정지가 아닌 회피 기동 obstacle_detect 함수는 장애물이 있을때만 key에 값을 할당한다. """ # 장애물 회피용 ROI distance로 left, right string 받아오기 # if person_distance > stable_max_dist: # key = obstacle_detect(default, depth_frame) cv2.rectangle(frame, (cx + 10, cy - (h // 5) + 10), (cx - 10, cy - (h // 5) - 10), (255, 0, 0), 5) # 장애물이 없다면 사람 따라가기 # if key == None: key, speed, turn = drive(cx, left_limit, right_limit, turn, speed) # key,speed,turn = drive2(cx, left_limit, right_limit, turn, frame, speed, max_speed, max_turn) # key,speed,turn = drive3(cx, left_limit, right_limit, turn, frame, speed, max_speed, min_speed, max_turn, stable_min_dist, stable_max_dist, person_distance, start_speed_down=300) # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # 주행 알고리즘(drive)를 거치고 나온 속도/방향을 로봇에 전달 x, y, z, th, speed, turn = key_move(key, x, y, z, th, speed, turn) print('key: ', key) print('key_type: ', type(key)) print('x: {}, y: {}, th: {}, speed: {}, turn: {}'.format( x, y, th, speed, turn)) # 화면 중심 표시 cv2.circle(frame, (320, 240), 10, (255, 255, 255)) # 좌우 회전 구분선 그리기 cv2.line(frame, (left_limit, 0), (left_limit, frame.shape[0]), (255, 0, 0)) cv2.line(frame, (right_limit, 0), (right_limit, frame.shape[0]), (255, 0, 0)) # ROS Rate sleep rate.sleep() print('track id list: ', track_id_list) ''' box_center_roi = np.array((depth_frame[cy-10:cy+10, cx-10:cx+10]),dtype=np.float64) cv2.rectangle(frame, (cx-10, cy+10), (cx+10, cy-10), (255, 255, 255), 2) ''' safe_roi = np.array([[400, 400], [240, 400], [160, 480], [480, 480]]) #safe_roi = np.array([[240, 420], [400, 420], [480, 160], [480, 480]]) cv2.polylines(frame, [safe_roi], True, (255, 255, 255), 2) cv2.rectangle(frame, (205, 445), (195, 435), (255, 0, 0), 5) cv2.rectangle(frame, (245, 405), (235, 395), (255, 0, 0), 5) cv2.rectangle(frame, (405, 405), (395, 395), (255, 0, 0), 5) cv2.rectangle(frame, (445, 445), (435, 435), (255, 0, 0), 5) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # depth map을 칼라로 보기위함 # depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_frame, alpha=0.03), cv2.COLORMAP_JET) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break dc.release() cv2.destroyAllWindows()
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()] image = utils.draw_bbox(frame, pred_bbox) fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(image) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) ###################### image_h, image_w, _ = frame.shape out_boxes, out_scores, out_classes, num_boxes = pred_bbox classes=['Scalpel','Straight_dissection_clamp','Straight_mayo_scissor','Curved_mayo_scissor'] for i in range(num_boxes[0]): if int(out_classes[0][i]) < 0 or int(out_classes[0][i]) > 4: continue coor = out_boxes[0][i] coor[0] = int(coor[0] * image_h) coor[2] = int(coor[2] * image_h) coor[1] = int(coor[1] * image_w) coor[3] = int(coor[3] * image_w) fontScale = 0.5 score = out_scores[0][i] class_ind = int(out_classes[0][i]) if(X==class_ind and score> 85): print(coor[0],coor[1],coor[2],coor[3]) print(classes[class_ind]) print(score,end="%") print("") print("#########") ######################### if not FLAGS.dont_show: cv2.imshow("result", result) if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def training(sweep_q, worker_q): # GPU-initialization gpu_config = ConfigProto() gpu_config.gpu_options.per_process_gpu_memory_fraction = 0.3 gpu_config.gpu_options.allow_growth = True session = InteractiveSession(config=gpu_config) reset_wandb_env() worker_data = worker_q.get() run_name = "{}-{}".format(worker_data.sweep_run_name, worker_data.num) config = worker_data.config train=worker_data.train test=worker_data.test num_classes=worker_data.num_classes x=worker_data.x y=worker_data.y run = wandb.init( group=worker_data.sweep_name, job_type=worker_data.sweep_run_name, name=run_name, config=config, ) # Model dropout = run.config.dropout nodesizes = [run.config.node_size2, run.config.node_size3, run.config.node_size4] model = Sequential() model.add(Bidirectional(LSTM(run.config.node_size1, return_sequences=True), input_shape=(x.shape[1], x.shape[2]))) model.add(Dropout(rate=dropout)) for i in range(0,run.config.num_layers): #number of layers ramdom between 1 an 3 model.add(Bidirectional(LSTM(nodesizes[i],return_sequences=True))) model.add(Dropout(rate=dropout)) model.add(Bidirectional(LSTM(run.config.node_size5))) model.add(Dropout(rate=dropout)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=run.config.optimizer, metrics=['accuracy',Precision(),Recall()]) model.summary() model.fit(x[train],y[train], epochs=run.config.epochs, batch_size=run.config.batch_size, validation_data=(x[test],y[test]), shuffle=False,verbose=2, callbacks=[WandbCallback()]) #Test accuracy model_best_path = os.path.join(run.dir, "model-best.h5") best_model= load_model(filepath=model_best_path) y_eval = best_model.evaluate(x[test],y[test], verbose=0) #Confusion Matrix y_pred = best_model.predict(x[test]) y_pred_integer = np.argmax(y_pred, axis=1) y_test_integer = np.argmax(y[test], axis=1) y_pred_name = ([worker_data.token_labels[p] for p in y_pred_integer]) y_test_name = ([worker_data.token_labels[p] for p in y_test_integer]) wandb.sklearn.plot_confusion_matrix(y_test_name, y_pred_name) #Convert to TFLite tflite_converter = tf.lite.TFLiteConverter.from_keras_model(best_model) tflite_converter.experimental_new_converter = True tflite_model = tflite_converter.convert() open(os.path.join(wandb.run.dir, "model-best.tflite"), "wb").write(tflite_model) #Finish Run run.log(dict(val_accuracy=y_eval[1])) wandb.join() sweep_q.put(WorkerDoneData(val_accuracy=y_eval[1]))
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] ## Retrieving object moving sequence from text file generated from record_positions.py object_move_pos_strings = [] object_move_pos = [] #Retreive object pos from .txt file. with open("/home/pierre/MasterThesisObjectTracking/positions.txt", 'r') as object_pos_file: for line in object_pos_file: pos = str.split(line) object_move_pos_strings.append(pos) object_pos_file.close() #Convert positions to integers for element in object_move_pos_strings: ints = [int(item) for item in element] object_move_pos.append(ints) #Boolean for when the object mover sequence is still running still_moving = True #Boolean that turns true when object has finished to move end_trial = False moving_trigger = 0 #Dynmixel setup # connecting Ax12.open_port() Ax12.set_baudrate() #Declaring servomotor for pan and tilt camera_panning_motor = Ax12(1) camera_tilt_motor = Ax12(2) # Declaring servomotor for object object_pan1 = Ax12(3) object_tilt1 = Ax12(4) object_tilt2 = Ax12(5) object_pan2 = Ax12(6) start_pan = 500 start_tilt = 230 # define video codec fourcc = cv2.VideoWriter_fourcc(*'XVID') bound_box = [0,0,0,0] frames_per_second = 0 time.sleep(0.2) #Set start position camera_panning_motor.set_position(start_pan) camera_tilt_motor.set_position(start_tilt) #Set start position for object move_object_motors(object_move_pos[0], speed= 500) camera_panning_motor.set_torque_limit(1023) camera_tilt_motor.set_torque_limit(1023) camera_panning_motor.set_moving_speed(1000) camera_tilt_motor.set_moving_speed(1000) one_degree = int(camera_panning_motor.get_torque_limit()/300) # Create class instance of second counter main_count = timer_sec.SecondCounter() # Create class instance of second counter when tracker is in ROI roi_count = timer_sec.Counter_tread() roi_seconds = 0 returned_roi_seconds = 0 in_roi = False been_in_roi = False first_entry = True start_count = True left_roi = False roi_seconds_start = 0 roi_seconds_stop = 0 last_move_time = 0 roi = 35 margin = 70 tracksuccess = False track_lost = False refound = 0 first_detection = True #Count for iterating in object position list next_object_pos = 0 object_speed = 100 move_trigger = 0 #Function that returns center of ROI def goalPosition (bbox): x, y = int(bbox[0]), int(bbox[1]) w = int(bbox[2]) - int(bbox[0]) h = int(bbox[3]) - int(bbox[1]) center_x = int(x + w/2) center_y = int(y + h/2) center = [center_x, center_y] return center # Function that calculates distance between center of frame and center of ROI def calculateDistance(cam_center, track_center): ROI = False x_diff = cam_center[0] - track_center[0] y_diff = cam_center[1] - track_center[1] difference = [x_diff, y_diff] if abs(x_diff) <= roi and abs(y_diff) <= roi: ROI = True return difference, ROI # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) import os import shutil output_dir = "{}/outputs".format(os.getcwd()) if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) frames_dir = "{}/frames".format(output_dir) text_dir = "{}/txt_files".format(output_dir) video_dir = "{}/video".format(output_dir) os.makedirs(frames_dir) os.makedirs(text_dir) os.makedirs(video_dir) # Create output return_value, frame = vid.read() videOutput = cv2.VideoWriter("{}/output.avi".format(video_dir), fourcc, 30, (frame.shape[1], frame.shape[0])) videoOutput_no_box = cv2.VideoWriter("{}/output_no_graphics.avi".format(video_dir), fourcc, 30, (frame.shape[1], frame.shape[0])) frame_num = 0 # start the main count of seconds main_count.start() # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num +=1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() no_graphics = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) videoOutput_no_box.write(no_graphics) cv2.imwrite("{}/frame_{}.jpg".format(frames_dir,frame_num), no_graphics) #Checks if where still moving positions for the object if next_object_pos < len(object_move_pos): print(f'objec_move_pos length = {len(object_move_pos)}') still_moving = True else: print("Object moving sequence is finished") if still_moving: move_finished_time = main_count.peek() still_moving = False if still_moving: move_object_motors(object_move_pos[next_object_pos], speed= object_speed) # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file #allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ['cell phone', 'cup'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) #Drawing a circle in the center of the frame camera_center = [int(frame.shape[1]/2), int(frame.shape[0]/2)] cv2.circle(frame, (camera_center[0], camera_center[1]), radius= 1, color= (0, 255, 0), thickness= 10) distance = [0,0] # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: track_lost = True tracksuccess = False bound_box = [0, 0, 0, 0] continue bbox = track.to_tlbr() class_name = track.get_class() tracksuccess = True if tracksuccess and track_lost: refound +=1 track_lost = False first_detection = False # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) #Calulate center of bounding box and draw circle center = goalPosition(bbox) cv2.circle(frame, (center[0], center[1]), radius =0, color= color, thickness=10) print("Bbox: {0}" .format(bbox)) #Storing value for the output.csv file bound_box = bbox distance, in_roi = calculateDistance(camera_center, center) moveMotors(distance[0], distance[1], camera_panning_motor, camera_tilt_motor, roi, margin) #difference in x-cordinates rescpeticly y-cordinates # if enable info flag then print details about each track if FLAGS.info: print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) frames_per_second = fps #Fps text cv2.putText(frame, "FPS {0}".format(str(int(fps))), (75, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 2) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) #moving interval in seconds moving_interval = 3 if still_moving: if main_count.peek() > 1 and main_count.peek() < 3: move_trigger = main_count.peek() elif main_count.peek() > move_trigger +0.02: move_trigger += moving_interval next_object_pos += 1 else: print("Object move coreography is finished") if main_count.peek() > move_finished_time + 2: end_trial = True ##Starts timer when the ROI is centered if in_roi and tracksuccess: if start_count: roi_count.start() start_count = False if first_entry: roi_seconds = roi_count.peek() if left_roi: roi_seconds_start = roi_count.peek() been_in_roi = True left_roi = False ##Stops timer when ROI is not centered and accumulate time in counter if been_in_roi and tracksuccess and in_roi is False : print("roi_stop: {}".format(roi_seconds_stop)) if first_entry: roi_seconds_stop = 0 else: roi_seconds_stop = roi_count.peek() - roi_seconds_start roi_seconds += roi_seconds_stop been_in_roi = False first_entry = False left_roi = True ## Graphics to show timer counters cv2.putText(result, "Sec {0}".format(str(int(main_count.peek()))), (500, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 255), 2) cv2.putText(result, "Sec in ROI {0}".format(str(int(roi_seconds))), (400, 85), cv2.FONT_HERSHEY_COMPLEX, 1, (254, 0, 255), 2) #cv2.imshow("Frame", frame) videOutput.write(result) mean_roi_sec = round((roi_seconds / main_count.peek()) * 100, 2) camera_pos = [camera_panning_motor.get_position(), camera_tilt_motor.get_position()] object_pos = [Ax12(3).get_position(),Ax12(4).get_position(),Ax12(5).get_position(),Ax12(6).get_position()] #Create a csv to store results with open('outputs/output.csv', 'a', newline='') as csvfile: fieldnames = ['Frame', 'FPS', 'Distance_x', 'Distance_y', 'bbox_xmin', 'bbox_ymin', 'bbox_xmax', 'bbox_ymax', 'Prop_roi(%)', 'Time', 'Roi_time', 'In_roi', 'Tracking_success', 'Refound_tracking', 'camera_pan', 'camera_tilt', 'object_pan1', 'object_tilt1', 'object_tilt2', 'object_pan2', 'img_center_x', 'img_center_y'] csv_writer = csv.DictWriter(csvfile, fieldnames=fieldnames) #Fill in values in csv file if frame_num ==1: csv_writer.writeheader() csv_writer.writerow({'Frame': str(frame_num), 'FPS': str(int(frames_per_second)) ,'Distance_x': str(distance[0]), 'Distance_y': str(distance[1]), 'bbox_xmin': str(int(bound_box[0])), 'bbox_ymin': str(int(bound_box[1])), 'bbox_xmax': str(int(bound_box[2])), 'bbox_ymax': str(int(bound_box[3])), 'Prop_roi(%)': str(mean_roi_sec), 'Time': str(round(main_count.peek(), 2)), 'Roi_time': str(round(roi_seconds,2)), 'Tracking_success': str(tracksuccess), 'Refound_tracking': str(refound-1), 'camera_pan': str(camera_pos[0]), 'camera_tilt':str(camera_pos[1]), 'object_pan1':str(object_pos[0]), 'object_tilt1':str(object_pos[1]), 'object_tilt2':str(object_pos[2]), 'object_pan2':str(object_pos[3]), 'img_center_x': str(camera_center[0]), 'img_center_y': str(camera_center[1])}) csvfile.close() if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q') or end_trial: break cv2.destroyAllWindows() videOutput.release() videoOutput_no_box.release() Ax12.close_port() # stop the count and get elapsed time main_seconds = main_count.finish() roi_count.finish()
class Application: def __init__(self): ap = argparse.ArgumentParser() ap.add_argument("-d", "--dataset", required=True, type=str, help="path to input dataset") ap.add_argument("-p", "--plot", type=str, default="plot.png", help="path to output accuracy/loss plot") ap.add_argument("-n", "--number_images", type=int, default=5000, help="number of images to load") ap.add_argument("-e", "--epochs", type=int, default=25, help="number of epochs") ap.add_argument("-b", "--batch_size", type=int, default=16, help="batch size") ap.add_argument("-lr", "--learning_rate", type=float, default=1e-3, help="learning rate") ap.add_argument( "-m", "--model", type=str, default="RESNET50", help= "model to choose `LENET` or `INCEPTIONV3` or `RESNET50` or `VGG16`" ) ap.add_argument("-g", "--gpu", type=str, default="yes", help="Use the gpu `yes` or `no`") self.args = vars(ap.parse_args()) self.model = Model(self.args) def run(self): self.model.run() self.model.save_plot() def create_session(self): print("[*] Settings config ..") if self.args["gpu"] == "no": os.environ["CUDA_VISIBLE_DEVICES"] = "-1" if tf.test.gpu_device_name(): print('GPU found') else: print("No GPU found") config = ConfigProto() self.session = InteractiveSession(config=config) print("[*] Done") #config.gpu_options.allow_growth = True #config.gpu_options.allocator_type = "BFC" #config.gpu_options.per_process_gpu_memory_fraction = 0.90 def close_session(self): print("[!] Closing session ..") self.session.close() del self.session print("[*] Done")
def main(_argv): lista = 0 config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size image_path = FLAGS.image if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # mude para o diretório no seu pc contendo as imagens images = glob.glob('video1/*.jpg') for j in range(len(images)): print(j) # mude para o diretório no seu pc contendo as imagens original_image = cv2.imread('video1/frame' + str(j) + '.jpg') original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) # image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. # image_data = image_data[np.newaxis, ...].astype(np.float32) images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) pred_bbox = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] # variavel lista contem os bounding boxes dos objetos com a função modificada para remover veiculos repetidos... image, lista = utils.draw_bbox_modificado(original_image, pred_bbox) # image = utils.draw_bbox(image_data*255, pred_bbox) image = Image.fromarray(image.astype(np.uint8)) #image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) # mude para o diretório no seu pc em que deseja salvar as imagens cv2.imwrite('output/' + str(j) + '.jpg', image)
class Model: """ Model Class for Model Abstraction, Class implentation is similar to the Tensorflow Sequential and Functional Model Using Graph Please refer to: https://www.tensorflow.org/api_docs/python/tf/Graph for more details """ def __init__(self, commands, input_size=1960, first_conv_filter=8, model_dir="model", frequency_size=40, time_size=49, sess=False, preprocess="micro", training=True): """ Initialization of variables and Tensor Session """ self.check_session(sess) self.commands = commands self.commands_dic = self.create_commands(self.commands) print(self.commands_dic) self._softmax_layer, self._dropout_placeholder = self._build( input_size, first_conv_filter, frequency_size, time_size, training) self._model_dir = model_dir self._input_size = input_size self._loaded = False self._start_step = 0 self._global_step = tf.compat.v1.train.get_or_create_global_step() self._save_step = 1 if preprocess == "micro": self.mfcc, self.wav_filename_placeholder = Micro_process() else: self.mfcc, self.wav_filename_placeholder = run_mfcc() self.train(learn_rate=[0, 0], dropout_rate=0, save_step=0, batch_size=0, eval_step=0, training_time=0, rate_step=0, display_step=0, train_data=0, Validation_data=0, init=True) assert type(commands) == list, " Commands type should be a list " assert type( model_dir) == str, "model directory should be a string object" def _build(self, input_size, first_conv_filter, frequency_size, time_size, training, input_1d=False): """ This a private protected Method to Build the Model Layer in graph Args: input_size: Size of the flattened input default to 1960 first_conv_filter : Size of filter for first convolutional layer second_conv_filter : Size of filter for second convolutional layer frequecncy_size : Size of MFCC rows. Refer to feature extraction for run_MFCC method time_size : Size of MFCC cols returns: Returns are abstracted """ dropout_rate = tf.compat.v1.placeholder(tf.float32, name='dropout_rate') self._fingerprint_input = tf.compat.v1.placeholder( tf.float32, [None, input_size], name='fingerprint_input') if training: input_4d = tf.reshape( self. _fingerprint_input, # input: MFCC for commands [batch_size, input_size] [-1, time_size, frequency_size, 1 ]) # output reshape [batch_size, rows, cols, channel] else: input_4d = tf.reshape( input_1d, # input: MFCC for commands [batch_size, input_size] [-1, time_size, frequency_size, 1]) with tf.compat.v1.variable_scope("first_weights", reuse=tf.compat.v1.AUTO_REUSE): first_weights = tf.compat.v1.get_variable( # Weights Initialization name='first_weights', initializer=tf.compat.v1.truncated_normal_initializer( stddev=0.01), shape=[10, 8, 1, first_conv_filter]) with tf.compat.v1.variable_scope("first_bias", reuse=tf.compat.v1.AUTO_REUSE): first_bias = tf.compat.v1.get_variable( # Bias Initialization name='first_bias', initializer=tf.compat.v1.zeros_initializer, shape=[ first_conv_filter, ]) first_conv = tf.nn.conv2d( input=input_4d, # First Convolution Layer filters=first_weights, #input: [batch_size, rows, cols, channel] strides=[1, 2, 2, 1], padding='SAME') + first_bias first_relu = tf.nn.relu(first_conv) if training: first_dropout = tf.nn.dropout(first_relu, rate=dropout_rate) else: first_dropout = first_relu conv_shape = first_dropout.get_shape() conv_output_width = conv_shape[2] conv_output_height = conv_shape[1] conv_element_count = int(conv_output_width * conv_output_height * first_conv_filter) flattened_first_conv = tf.reshape(first_dropout, [-1, conv_element_count]) label_count = len(self.commands_dic) with tf.compat.v1.variable_scope("softmax_weights", reuse=tf.compat.v1.AUTO_REUSE): softmax_weights = tf.compat.v1.get_variable( name='softmax_weights', initializer=tf.compat.v1.truncated_normal_initializer( stddev=0.01), shape=[conv_element_count, label_count]) with tf.compat.v1.variable_scope("softmax_bias", reuse=tf.compat.v1.AUTO_REUSE): softmax_bias = tf.compat.v1.get_variable( name='softmax_bias', initializer=tf.compat.v1.zeros_initializer, shape=[label_count]) softmax_layer = tf.matmul(flattened_first_conv, softmax_weights) + softmax_bias if training: return softmax_layer, dropout_rate return softmax_layer def train(self, learn_rate, dropout_rate, save_step, batch_size, eval_step, training_time, rate_step, display_step, train_data, Validation_data, init=False): self._save_step = save_step self._training_time = training_time assert type(learn_rate) == list,\ "Learn Rate should be a List to be used. e.g [.001, .0001]" self._ground_truth_input = tf.compat.v1.placeholder( tf.int64, [None], name='groundtruth_input') with tf.compat.v1.name_scope('cross_entropy'): self._cross_entropy_mean = tf.compat.v1.losses.sparse_softmax_cross_entropy( labels=self._ground_truth_input, logits=self._softmax_layer) learning_rate_input = tf.compat.v1.placeholder( tf.float32, [], name='learning_rate_input') train_step = tf.compat.v1.train.GradientDescentOptimizer( learning_rate_input).minimize(self._cross_entropy_mean) self._predicted = tf.argmax(input=self._softmax_layer, axis=1) correct_prediction = tf.equal(self._predicted, self._ground_truth_input) self._evaluation_step = tf.reduce_mean( input_tensor=tf.cast(correct_prediction, tf.float32)) saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables()) if self._loaded is False and self._start_step == 0: self._global_step = tf.compat.v1.train.get_or_create_global_step() tf.compat.v1.global_variables_initializer().run() #self._loaded = True increment_global_step = tf.compat.v1.assign(self._global_step, self._global_step + 1) if init is False: tf.io.write_graph(self._sess.graph_def, self._model_dir, "model" + '.pbtxt') with gfile.GFile( os.path.join(self._model_dir, "commands" + '_labels.txt'), 'wb') as f: f.write('\n'.join(self.commands)) if training_time <= self._start_step and self._loaded is True: print( f"Checkpoint Loaded has been trained to {self._start_step} epochs,\ \n New Trainig starts from {self._start_step}, Please increase Training_time to train model" ) if init is False: if tf.config.list_physical_devices('GPU'): strategy = tf.distribute.MirroredStrategy() else: # use default strategy strategy = tf.distribute.get_strategy() with strategy.scope(): history = { "categorical_accuracy": [], "loss": [], "val_categorical_accuracy": [], "val_loss": [] } learning_rate = learn_rate[0] for training_step in xrange(self._start_step, training_time): if training_step == int(rate_step): learning_rate = learn_rate[1] x_train, y_train = self.get_next_batch( batch_size, train_data) train_accuracy, cross_entropy_value, _, _ = self._sess.run( [ self._evaluation_step, self._cross_entropy_mean, train_step, increment_global_step, ], feed_dict={ self._fingerprint_input: x_train, self._ground_truth_input: y_train, learning_rate_input: learning_rate, self._dropout_placeholder: dropout_rate }) if training_step % int(display_step) == 0: print( 'Step #%d: learning rate %f, accuracy %.1f%%, cross entropy %f' % (training_step, learning_rate, train_accuracy * 100, cross_entropy_value)) history["categorical_accuracy"].append(train_accuracy) history["loss"].append(cross_entropy_value) if training_step % int(eval_step) == 0: x_val, y_val = self.get_next_batch( batch_size * 4, Validation_data) validation_accuracy, val_crossentropy_value = self._sess.run( [self._evaluation_step, self._cross_entropy_mean], feed_dict={ self._fingerprint_input: x_val, self._ground_truth_input: y_val, self._dropout_placeholder: 0.0 }) history["val_categorical_accuracy"].append( validation_accuracy) history["val_loss"].append(val_crossentropy_value) print( 'Step %d: Validation accuracy = %.1f%% (Val Size=%d), Validation loss = %f' % (training_step, validation_accuracy * 100, batch_size * 4, val_crossentropy_value)) if (training_step % int(save_step) == 0) or (training_step == training_time - 1): path_to_save = os.path.join( self._model_dir, "model_checkpoint" + '.ckpt') if (training_step == training_time - 1): training_step = training_time saver.save(self._sess, path_to_save, global_step=training_step) self._start_step = self._global_step.eval( session=self._sess) return history def check_session(self, sess=False): if sess != False: if sess._closed: if tf.test.is_built_with_cuda(): # Check GPU compatibility from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession config = ConfigProto() config.gpu_options.allow_growth = True #sess.close() self._sess = InteractiveSession(config=config) else: # Run on CPU if GPU is not available #sess.close() self._sess = InteractiveSession() else: self._sess = sess else: if tf.test.is_built_with_cuda(): # Check GPU compatibility from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession config = ConfigProto() config.gpu_options.allow_growth = True #sess.close() self._sess = InteractiveSession(config=config) else: # Run on CPU if GPU is not available #sess.close() self._sess = InteractiveSession() def create_commands(self, commands): commands_dic = {} for i in range(len(commands)): commands_dic[i] = commands[i] self.dic_commands = {} for i in range(len(commands)): self.dic_commands[commands[i]] = i return commands_dic def get_next_batch(self, batch_size, file_path): data = [] labels = [] np.random.shuffle(file_path) for i in range(batch_size): data.append( self._sess.run(self.mfcc, feed_dict={ self.wav_filename_placeholder: file_path[i] }).flatten()) label = get_label(file_path[i]) if label in self.commands: labels.append(self.dic_commands[label]) else: labels.append(self.dic_commands["unknown"]) return np.stack(data), np.stack(labels) def predict(self, input_data): predicted = self._sess.run([self._predicted], feed_dict={ self._fingerprint_input: input_data, self._dropout_placeholder: 0.0 }) return predicted[0], [ self.commands_dic[n.item()] for n in predicted[0] ] def evaluate(self, input_data, labels, verbose=1): validation_accuracy, val_crossentropy_value = self._sess.run( [self._evaluation_step, self._cross_entropy_mean], feed_dict={ self._fingerprint_input: input_data, self._ground_truth_input: labels, self._dropout_placeholder: 0.0 }) if verbose: print('Validation accuracy = %.1f%%, Validation loss = %f' % (validation_accuracy * 100, val_crossentropy_value)) return validation_accuracy, val_crossentropy_value def load_checkpoint(self, path=0): if path == 0: try: last = int( self._start_step // self._save_step) * self._save_step path = os.path.join(self._model_dir, "model_checkpoint" + '.ckpt-' + str(last)) except: print( "Check point Path does not Exist, pass path as Arguiment or train for a number of epochs" ) return #assert os.file.exists(path), "Path does not exist" saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables()) saver.restore(self._sess, path) self._start_step = self._global_step.eval(session=self._sess) self._loaded = True return True def save_pb_model(self, file_name, first_conv_filter=128, frequency_size=40, time_size=49, last_checkpoint=0): """ Save Model For Inference """ mfcc, placholder = Micro_process(integer=32767) input_1d = tf.reshape(mfcc, [-1, self._input_size]) softmax_layer = self._build(self._input_size, first_conv_filter, frequency_size, time_size, training=False, input_1d=input_1d) output = tf.nn.softmax(softmax_layer, name='labels_softmax') if last_checkpoint == 0: # Should load from last saved checkpoint self.load_checkpoint() else: self.load_checkpoint(path=last_checkpoint) build = tf.compat.v1.saved_model.builder.SavedModelBuilder(file_name) info_inputs = { 'input': tf.compat.v1.saved_model.utils.build_tensor_info(input_1d) } info_outputs = { 'output': tf.compat.v1.saved_model.utils.build_tensor_info(output) } signature = ( tf.compat.v1.saved_model.signature_def_utils.build_signature_def( inputs=info_inputs, outputs=info_outputs, method_name=tf.compat.v1.saved_model.signature_constants. PREDICT_METHOD_NAME)) build.add_meta_graph_and_variables( self._sess, [tf.compat.v1.saved_model.tag_constants.SERVING], signature_def_map={ tf.compat.v1.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature, }, ) build.save()
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video print("Video from: ", video_path) vid = cv2.VideoCapture(video_path) if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image! Try with another video format") frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) pred_bbox = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] image = utils.draw_bbox(frame, pred_bbox) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size image_path = FLAGS.image original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) # image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. # image_data = image_data[np.newaxis, ...].astype(np.float32) images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] # add post process code here bbox_tensors = [] prob_tensors = [] if FLAGS.tiny: for i, fm in enumerate(pred): if i == 0: output_tensors = decode(pred[1], input_size // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, 'tflite') else: output_tensors = decode(pred[0], input_size // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, 'tflite') bbox_tensors.append(output_tensors[0]) prob_tensors.append(output_tensors[1]) else: for i, fm in enumerate(pred): if i == 0: output_tensors = decode(pred[2], input_size // 8, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, 'tflite') elif i == 1: output_tensors = decode(pred[0], input_size // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, 'tflite') else: output_tensors = decode(pred[1], input_size // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, 'tflite') bbox_tensors.append(output_tensors[0]) prob_tensors.append(output_tensors[1]) pred_bbox = tf.concat(bbox_tensors, axis=1) pred_prob = tf.concat(prob_tensors, axis=1) pred = (pred_bbox, pred_prob) if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()] image = utils.draw_bbox(original_image, pred_bbox) # image = utils.draw_bbox(image_data*255, pred_bbox) image = Image.fromarray(image.astype(np.uint8)) image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output, image)
def glass_detector(image_name): image_size = 416 imput_image = image_name config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = image_size images = [imput_image] # load model weights_loaded = "./checkpoints/glasses-tf-416" if framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=weights_loaded) else: saved_model_loaded = tf.saved_model.load(weights_loaded, tags=[tag_constants.SERVING]) # loop through images in list and run Yolov4 model on each for count, image_path in enumerate(images, 1): original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) if framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if model == 'yolov3' and tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=score) pred_bbox = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] #image = utils.draw_bbox(original_image, pred_bbox) cropped_image = utils.draw_bbox(original_image, pred_bbox) # image = utils.draw_bbox(image_data*255, pred_bbox) image = Image.fromarray(cropped_image.astype(np.uint8)) #if not FLAGS.dont_show: #image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(output + 'DetectedGlass' + str(count) + '.jpg', image) return image
def predict(filepath): import os config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) sys.setrecursionlimit(40000) parser = OptionParser() num_rois = 10 config_filename = "config_combine_200.pickle" network = "vgg" write = True load = "models/vgg/combine_200.hdf5" config_output_filename = config_filename with open(config_output_filename, 'rb') as f_in: C = pickle.load(f_in) # turn off any data augmentation at test time C.network = "vgg16" C.use_horizontal_flips = False C.use_vertical_flips = False C.rot_90 = False class_mapping = C.class_mapping if 'bg' not in class_mapping: class_mapping['bg'] = len(class_mapping) class_mapping = {v: k for k, v in class_mapping.items()} print(class_mapping) class_to_color = { class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping } C.num_rois = int(num_rois) if C.network == 'resnet50': num_features = 1024 elif C.network == "mobilenetv2": num_features = 320 else: # may need to fix this up with your backbone..! print("backbone is not resnet50. number of features chosen is 512") num_features = 512 if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) input_shape_features = (num_features, None, None) else: input_shape_img = (None, None, 3) input_shape_features = (None, None, num_features) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping)) model_rpn = Model(img_input, rpn_layers) model_classifier = Model([feature_map_input, roi_input], classifier) # model loading if load == None: print('Loading weights from {}'.format(C.model_path)) model_rpn.load_weights(C.model_path, by_name=True) model_classifier.load_weights(C.model_path, by_name=True) else: print('Loading weights from {}'.format(load)) model_rpn.load_weights(load, by_name=True) model_classifier.load_weights(load, by_name=True) #model_rpn.compile(optimizer='adam', loss='mse') #model_classifier.compile(optimizer='adam', loss='mse') all_imgs = [] classes = {} bbox_threshold = 0.5 visualise = True num_rois = C.num_rois st = time.time() if "/" in filepath: img_name = filepath.split("/")[-1] else: img_name = filepath.split("\\")[-1] img = cv2.imread(filepath) # preprocess image X, ratio = format_img(img, C) img_scaled = (np.transpose(X[0, :, :, :], (1, 2, 0)) + 127.5).astype('uint8') if K.image_dim_ordering() == 'tf': X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN [Y1, Y2, F] = model_rpn.predict(X) R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7) # print(R.shape) # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} for jk in range(R.shape[0] // num_rois + 1): ROIs = np.expand_dims(R[num_rois * jk:num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // num_rois: #pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = model_classifier.predict([F, ROIs]) print(P_cls) for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :]) < 0.6 or np.argmax( P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] bboxes[cls_name].append( [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)]) probs[cls_name].append(np.max(P_cls[0, ii, :])) all_dets = [] for key in bboxes: # print(key) # print(len(bboxes[key])) bbox = np.array(bboxes[key]) new_boxes, new_probs = roi_helpers.non_max_suppression_fast( bbox, np.array(probs[key]), overlap_thresh=0.1) for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk, :] (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2) cv2.rectangle( img, (real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int( class_to_color[key][1]), int(class_to_color[key][2])), 2) textLabel = '{}: {}'.format(key, int(100 * new_probs[jk])) all_dets.append((key, 100 * new_probs[jk])) (retval, baseLine) = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_COMPLEX, 1, 1) textOrg = (real_x1, real_y1 - 0) # cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2) # cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1) cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 255), 1) print('Elapsed time = {}'.format(time.time() - st)) print(all_dets) print(bboxes) # enable if you want to show pics if write: if not os.path.isdir("app/static/Deployment/results"): os.mkdir("app/static/Deployment/results") cv2.imwrite( os.path.join('app/static/Deployment/results', '{}'.format(img_name)), img) # if __name__ == "__main__": # filepath = "111.jpg" # predict(filepath)
import cv2 import numpy as np from tqdm import tqdm import os from random import shuffle from zipfile import ZipFile from PIL import Image import math import seaborn as sn from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) # X=[] # Z=[] save_history = [] save_train_accuracy = [] save_test_accuracy = [] save_valid_accuracy = [] save_confusion = [] sid = ['101', '102', '103', '104', '105', '106'] input_directory = os.getcwd() list_directory = os.listdir(input_directory)
def train(args, event_log, preprocessor, train_indices): """ Trains a model for outcome prediction. Parameters ---------- args : Namespace Settings of the configuration parameters. event_log : list of dicts, where single dict represents a case pm4py.objects.log.log.EventLog object representing an event log. preprocessor : nap.preprocessor.Preprocessor Object to preprocess input data. train_indices : list of arrays consisting of ints Indices of training cases from event log per fold. Returns ------- timedelta : Time passed while training a model. int : Trial ID which identifies the best trial, if hyper-parameter optimization is performed. Otherwise is -1. """ # gpu sharing from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession # gpu sharing config = ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.2 config.gpu_options.allow_growth = True session = InteractiveSession(config=config) cases_of_fold = preprocessor.get_cases_of_fold(event_log, train_indices) subseq_cases_of_fold = preprocessor.get_subsequences_of_cases(cases_of_fold) if args.hpo: hpo.create_data(args, event_log, preprocessor, cases_of_fold) if args.seed: sampler = optuna.samplers.TPESampler( seed=args.seed_val) # Make the sampler behave in a deterministic way. else: sampler = optuna.samplers.TPESampler() study = optuna.create_study(direction='maximize', sampler=sampler) start_training_time = datetime.now() study.optimize(find_best_model, n_trials=args.hpo_eval_runs) training_time = datetime.now() - start_training_time print("Number of finished trials: {}".format(len(study.trials))) print("Best trial:") trial = study.best_trial print(" Value: {}".format(trial.value)) print(" Params: ") for key, value in trial.params.items(): print(" {}: {}".format(key, value)) return training_time.total_seconds() else: return train_model(args, event_log, preprocessor, subseq_cases_of_fold), -1