def main(argv): # print("location recieved in main as: ", e) ################################### global VIOLATION_PERCENTAGE, PROCESSING_STATUS, VIOLATION_FRAME violator_count_list = list() ################################### # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) yolo = YoloV3(classes=80) yolo.load_weights('./weights/yolov3.tf') logging.info('weights loaded') class_names = [c.strip() for c in open('./coco.names').readlines()] logging.info('classes loaded') video_path = 'test.mkv' try: vid = cv2.VideoCapture(int(FILE_URL)) except: vid = cv2.VideoCapture(FILE_URL) time.sleep(1.0) out = None width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) print("height: ", height) print("width: ", width) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('./result.avi', codec, fps, (width, height)) frame_index = -1 fps = 0.0 count = 0 PROCESSING_STATUS = True while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, 416) temp_violators = set() temp_total_people = set() t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue class_name1 = track.get_class() if class_name1 == "person": temp_total_people.add(track.track_id) bbox1 = track.to_tlbr() x1_c = int(bbox1[0] + (bbox1[2] - bbox1[0]) / 2) y1_c = int(bbox1[1] + (bbox1[3] - bbox1[1]) / 2) r1 = int(abs(bbox1[3] - bbox1[1])) color = (255, 0, 0) cv2.line(img, (x1_c, y1_c), (x1_c, y1_c + r1 // 2), (0, 255, 0), 2) cv2.circle(img, (x1_c, y1_c), 5, (255, 20, 200), -1) scale = (r1) / 100 transparentOverlay(img, dst_circle, (x1_c, y1_c - 5), alphaVal=110, color=(0, 200, 20), scale=scale) for other in tracker.tracks: if not other.is_confirmed() or other.time_since_update > 1: continue if track.track_id == other.track_id: continue class_name2 = other.get_class() if class_name2 == "person": temp_total_people.add(other.track_id) bbox2 = other.to_tlbr() x2_c = int(bbox2[0] + (bbox2[2] - bbox2[0]) / 2) y2_c = int(bbox2[1] + (bbox2[3] - bbox2[1]) / 2) r2 = int(abs(bbox2[3] - bbox2[1])) if int_circle(x1_c, y1_c, x2_c, y2_c, r1 // 2, r2 // 2) >= 0 and abs(y1_c - y2_c) < r1 // 4: temp_violators.add(track.track_id) temp_violators.add(other.track_id) cv2.line(img, (x1_c, y1_c), (x2_c, y2_c), (0, 0, 255), 2) scale1 = (r1) / 100 transparentOverlay(img, dst_circle, (x1_c, y1_c - 5), alphaVal=110, color=(0, 0, 255), scale=scale1) scale2 = (r2) / 100 transparentOverlay(img, dst_circle, (x2_c, y2_c - 5), alphaVal=110, color=(0, 0, 255), scale=scale2) # print fps on screen ### Comment below 3 lines to not see live output screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) ### Violators calculation violators_for_frame = len(temp_violators) VIOLATION_PERCENTAGE = violators_for_frame print("Violation percentage: ", violators_for_frame) violator_count_list.append(int(violators_for_frame)) ### ### Call to firebase upload function # if violators_for_frame > 20: # social_dist_violation_frame_handler(img) # cv2.imwrite("temp.png",img) # firebase_upload("temp.png") # os.remove("temp.png") frame_index = frame_index + 1 # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if len(violator_count_list) == 0: mean_violation = 0 else: mean_violation = sum(violator_count_list) / len(violator_count_list) PROCESSING_STATUS = False out.release() cv2.destroyAllWindows()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) continue img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) t2 = time.time() times.append(t2 - t1) times = times[-20:] img = draw_outputs(img, (boxes, scores, classes, nums), class_names) img = cv2.putText( img, "FPS: {:.2f}".format(1 / (sum(times) / len(times)) * 1), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) if FLAGS.output: out.write(img) cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()
def main(_argv): if FLAGS.mode == "eager_tf": tf.compat.v1.enable_eager_execution() physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks if FLAGS.trace: run_options = tf.compat.v1.RunOptions( output_partition_graphs=True, trace_level=tf.compat.v1.RunOptions.FULL_TRACE) run_metadata = tf.compat.v1.RunMetadata() trace_dir = os.path.join("traces", "training") if not os.path.isdir(trace_dir): os.makedirs(trace_dir) graphs_dir = os.path.join("traces", "training", "graphs") if not os.path.isdir(graphs_dir): os.makedirs(graphs_dir) else: run_options = None run_metadata = None train_dataset = dataset.load_fake_dataset() if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, FLAGS.size) train_dataset = train_dataset.shuffle(buffer_size=512) train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) train_dataset = train_dataset.repeat() train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_fake_dataset() if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes, FLAGS.size) val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) val_dataset = val_dataset.repeat() # TF2 doesn't need this, but we're using TF1.15. if FLAGS.mode == "fit": sess = tf.keras.backend.get_session() sess.run(tf.compat.v1.global_variables_initializer(), options=run_options, run_metadata=run_metadata) if FLAGS.trace: fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open(os.path.join(trace_dir, f"variables_init.json"), 'w') as f: f.write(chrome_trace) for i in range(len(run_metadata.partition_graphs)): with open( os.path.join(graphs_dir, f"variables_init_partition_{i}.pbtxt"), 'w') as f: f.write(str(run_metadata.partition_graphs[i])) sess.run(tf.compat.v1.tables_initializer(), options=run_options, run_metadata=run_metadata) if FLAGS.trace: fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open(os.path.join(trace_dir, f"table_init.json"), 'w') as f: f.write(chrome_trace) for i in range(len(run_metadata.partition_graphs)): with open( os.path.join(graphs_dir, f"table_init_partition_{i}.pbtxt"), 'w') as f: f.write(str(run_metadata.partition_graphs[i])) # Configure the model for transfer learning if FLAGS.transfer == 'none': pass # Nothing to do elif FLAGS.transfer in ['darknet', 'no_output']: # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers if FLAGS.tiny: model_pretrained = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) else: model_pretrained = YoloV3(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) model_pretrained.load_weights(FLAGS.weights) if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) elif FLAGS.transfer == 'no_output': for l in model.layers: if not l.name.startswith('yolo_output'): l.set_weights( model_pretrained.get_layer(l.name).get_weights()) freeze_all(l) else: # All other transfer require matching classes model.load_weights(FLAGS.weights) if FLAGS.transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.transfer == 'frozen': # freeze everything freeze_all(model) optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks ] if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit'), options=run_options, run_metadata=run_metadata) callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), ] class TraceCallback(tf.keras.callbacks.Callback): def on_epoch_begin(self, epoch, logs=None): self.current_epoch = epoch def on_train_batch_end(self, batch, logs=None): fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open( os.path.join( trace_dir, f"training_epoch_{self.current_epoch}_batch_{batch}.json" ), 'w') as f: f.write(chrome_trace) # No need to dump graph partitions for every batch; they should be identical. if batch == 0: for i in range(len(run_metadata.partition_graphs)): with open( os.path.join(graphs_dir, f"training_partition_{i}.pbtxt"), 'w') as f: f.write(str(run_metadata.partition_graphs[i])) if FLAGS.trace: callbacks.append(TraceCallback()) else: callbacks.append(TensorBoard(write_graph=False, log_dir="logs")) history = model.fit( train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset, steps_per_epoch=FLAGS.num_samples // FLAGS.batch_size, validation_steps=FLAGS.num_val_samples // FLAGS.batch_size)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) n = 1 if cv2.waitKey(1) == ord('a'): while True: _, img = vid.read() if (check_blur(img, threshold=200) == 0): #ADJUST THRESHOLD HERE cv2.imshow('output', cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) print("blurred") continue img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) fps = (fps + (1. / (time.time() - t1))) / 2 img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imshow('output', img) text = "" for i in range(80): if scores[0][i] == 0: break Class = int(classes[0][i]) place = ( (boxes[0][i][2] - boxes[0][i][0]) / 2) + boxes[0][i][0] print(place, class_names[Class]) if place < .33: side = 'left' elif place < .66: side = 'center' else: side = 'right' if side == 'center': text = text + " There is a " + class_names[ Class] + 'in the ' + side + '.' else: text = text + " There is a " + class_names[ Class] + 'on the ' + side + '.' try: # text = "This is a test." speech = gTTS(text=text, slow=False) speech.save( r'C:\\Users\\HARINI\\Object-Detection-API\\audio\\text' + str(n) + '.wav') #CHANGE THESE 2 PATHS TO YOUR OWN PATH os.system( r'C:\\Users\\HARINI\\Object-Detection-API\\audio\\text' + str(n) + '.wav') n = n + 1 except: continue if not (waitf(15)): break if FLAGS.output: out.write(img) cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit # if cv2.waitKey(1) == ord('q'): # break vid.release() if FLAGS.ouput: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): counta = 0 count = 0 dorsch_counter = 0 steinbutt_counter = 0 kliesche_counter = 0 herring_counter = 0 physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) fps = 0.0 count = 0 while True: _, img = vid.read() img_raw = img if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #img_raw = img_in #img_in = img_in[336:535, 787:1198] #cod first #img_in = img_in[365:555, 750:1278] #cod second vid # img_in = img_in[387:634, 739:1218] #flat_fish #img_in = img_in[344: 513, 766: 1042] #steinbutt #img_in = img_in[331:520, 789:1065] #liman #img_in = img_in[420: 580, 751: 1226] # cod_anne img_in = img_in[365:555, 750:1278] # cod_new img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) if not boxes.any(): dummy_nums.append(0) fine_class.append('no fish') cod_count.append(0) herring_count.append(0) kliesche_count.append(0) steinbutt_count.append(0) fps = (fps + (1. / (time.time() - t1))) / 2 img, stack_predections, length = draw_outputs( img[365:555, 750:1278], (boxes, scores, classes, nums), class_names) img_raw[365:555, 750:1278] = img #cod_trial cv2.putText(img_raw, "FPS: {:.2f}".format(fps), (0, 60), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) full_score.append(scores) boxes, scores, classes, nums = boxes[0], scores[0], classes[0], nums[0] if scores.any(): for i in range(nums): if (scores[i] * 100) > 50: cv2.putText( img_raw, 'Computed_length = {} cm'.format(round( (length[-1]))), (0, 240), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 3) fine_class.append(stack_predections[0]) if stack_predections[0] == 0: cv2.putText(img_raw, 'Detected Fish = Cod', (0, 100), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 3) if np.all(np.array(cod_count[-8:]) == 0) and np.all( np.array(herring_count[-8:]) == 0) and np.all( np.array(kliesche_count[-8:]) == 0) and np.all( np.array(steinbutt_count[-8:]) == 0): dorsch_counter += 1 cod_count.append(dorsch_counter) cod_count.append(dorsch_counter) if stack_predections[0] == 1: cv2.putText(img_raw, 'Detected Fish = Herring', (0, 100), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 3) if np.all(np.array(cod_count[-9:]) == 0) and np.all( np.array(herring_count[-9:]) == 0) and np.all( np.array(kliesche_count[-9:]) == 0) and np.all( np.array(steinbutt_count[-9:]) == 0): herring_counter += 1 herring_count.append(herring_counter) herring_count.append(herring_counter) if stack_predections[0] == 1: cv2.putText(img_raw, 'Detected Fish = Dab', (0, 100), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 3) if np.all( np.array(kliesche_count[-8:]) == 0) and np.all( np.array(herring_count[-8:]) == 0) and np.all( np.array(kliesche_count[-8:]) == 0 ) and np.all( np.array(steinbutt_count[-8:]) == 0): kliesche_counter += 1 kliesche_count.append(kliesche_counter) kliesche_count.append(kliesche_counter) if stack_predections[0] == 2: cv2.putText(img_raw, 'Detected Fish = Turbot', (0, 100), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 3) if np.all( np.array(steinbutt_count[-8:]) == 0 ) and np.all( np.array(herring_count[-8:]) == 0) and np.all( np.array(kliesche_count[-8:]) == 0) and np.all( np.array(steinbutt_count[-8:]) == 0): steinbutt_counter += 1 steinbutt_count.append(steinbutt_counter) steinbutt_count.append(steinbutt_counter) if np.all(np.array(dummy_nums[-5:]) == 0): counta += 1 dummy_nums.append(counta) dummy_nums.append(counta) if (scores[i] * 100) < 80: fine_class.append('no fish') dummy_nums.append(0) cod_count.append(0) kliesche_count.append(0) herring_count.append(0) steinbutt_count.append(0) print(dummy_nums) print(stack_predections) cv2.putText(img_raw, 'Total no of Fish = ' + str(counta), (0, 130), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 3) cv2.putText(img_raw, 'Total no of Cod = ' + str(dorsch_counter), (0, 160), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 3) cv2.putText(img_raw, 'Total no of Dab = ' + str(kliesche_counter), (0, 180), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 3) cv2.putText(img_raw, 'Total no of Herring = ' + str(herring_counter), (0, 200), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 3) cv2.putText(img_raw, 'Total no of Turbot = ' + str(steinbutt_counter), (0, 220), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 3) if FLAGS.output: cv2.resize(img_raw, (650, 550)) out.write(img_raw) cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()
def get_detections(): raw_images = [] images = request.files.getlist("images") image_names = [] for image in images: image_name = image.filename image_names.append(image_name) image.save(os.path.join(os.getcwd(), image_name)) img_raw = tf.image.decode_image(open(image_name, 'rb').read(), channels=3) raw_images.append(img_raw) num = 0 # create list for final response response = [] li = [] for j in range(len(raw_images)): # create list of responses for current image responses = [] raw_img = raw_images[j] num += 1 img = tf.expand_dims(raw_img, 0) img = transform_images(img, size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() print('time: {}'.format(t2 - t1)) # print("**",scores) print('detections:') for i in range(nums[0]): # if np.array(scores[0][i])*100>30: print('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) responses.append({ "class": class_names[int(classes[0][i])], "confidence": float("{0:.2f}".format(np.array(scores[0][i]) * 100)), "co ordinates": str("{}".format((np.array(boxes[0][i])))) }) # print(tuple(np.array(boxes[0][i]))) # img = Image.open("C:\\Repos\\object-Detection-API\\detections\\detection.jpg") # a,b = img.size # print("*****") # print(a,b) x, y, z, h = np.array(boxes[0][i]) p = finalList(class_names[int(classes[0][i])], x, y) li.append(p) # print(x,y,z,h) # crop = img.crop((x*a,y*b,z*a,h*b)) # crop.show() response.append({"image": image_names[j], "detections": responses}) # note the tuple img = cv2.cvtColor(raw_img.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite(output_path + 'detection' + '.jpg', img) print('output saved to: {}'.format(output_path + 'detection' + str(num) + '.jpg')) st = """ <!DOCTYPE html> <html> <head> <meta name="viewport" content="width=device-width" /> <title>HTML Result</title> <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css" integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous"> </head> <body> <div class="container body-content">""" en = """ </div> </body> </html> """ inputf = """ <div class="row justify-content-start" style="padding-top:10px;"> <label>Demo Text: </label> </div> <div class="row justify-content-center" style="padding-top:10px;"> <input class="form-control"></input> </div>""" button = """ <div class="col" style="padding-top:10px;"> <button class="btn btn-primary">Submit</button> </div>""" img = """ <img src="C:/repos/env/Object-Detection-API/img.png" width="150" height="150" alt="Image Here">""" radio = """ <div class="col" style="padding-top:10px;"> <input type="radio" id="male" name="Demo text" value="male"> <label for="male">Demo Text</label><br> </div> """ dropdown = """ <div class="dropdown"> <label for="cars">Dropdown:</label> <select name="cars" id="cars" class="btn btn-primary dropdown-toggle"> <option value="1">Option 1</option> <option value="2">Option 2</option> <option value="3">Option 3</option> <option value="4">Option 4</option> </select> </div>""" checkbox = """ <div class="col" style="padding-top:10px;"> <input type="checkbox" id="vehicle1" name="vehicle1" value="Bike"> <label for="vehicle1"> I have a bike</label><br> </div> """ text = """<div class="col" style="padding-top:10px;"> <p class="text-black-50"> You’ve probably heard of Lorem Ipsum before – it’s the most-used dummy text excerpt out there. People use it because it has a fairly normal distribution of letters and words (making it look like normal English), but it’s also Latin, which means your average reader won’t get distracted by trying to read it. </p> </div> """ sorted_li = sorted(li, key=operator.attrgetter('y')) # print("###########################") # for m in sorted_li: # print(m.name, m.y) # # print("###########################") for i in sorted_li: if i.name == "check box": st += checkbox elif i.name == "radio button": st += radio elif i.name == "dropdown": st += dropdown elif i.name == "input": st += inputf elif i.name == "submit": st += button elif i.name == "text": st += text else: st += img print(i.name, i.x, i.y) print(st + en) f = open("demofile3.html", "w") f.write(st + en) f.close() # remove temporary images for name in image_names: os.remove(name) try: return jsonify({"response": response}), 200 except FileNotFoundError: abort(404)
def main(_argv): # Horovod: initialize Horovod. hvd.init() # Horovod: pin GPU to be used to process local rank (one GPU per process) gpus = tf.config.experimental.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) if gpus: tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU') if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True) anchors = yolo_anchors anchor_masks = yolo_anchor_masks train_dataset = dataset.load_fake_dataset() if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes) train_dataset = train_dataset.shuffle(buffer_size=1024) # TODO: not 1024 train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map( lambda x, y: (dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, 80))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_fake_dataset() if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes) val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map( lambda x, y: (dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, 80))) if FLAGS.transfer != 'none': model.load_weights(FLAGS.weights) if FLAGS.transfer == 'fine_tune': # freeze darknet darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.mode == 'frozen': # freeze everything freeze_all(model) else: # reset top layers if FLAGS.tiny: # get initial weights init_model = YoloV3Tiny(FLAGS.size, training=True) else: init_model = YoloV3(FLAGS.size, training=True) if FLAGS.transfer == 'darknet': for l in model.layers: if l.name != 'yolo_darknet' and l.name.startswith('yolo_'): l.set_weights( init_model.get_layer(l.name).get_weights()) else: freeze_all(l) elif FLAGS.transfer == 'no_output': for l in model.layers: if l.name.startswith('yolo_output'): l.set_weights( init_model.get_layer(l.name).get_weights()) else: freeze_all(l) # Horovod: adjust learning rate based on number of GPUs. optimizer = tf.optimizers.Adam(FLAGS.learning_rate * hvd.size()) # Horovod: add Horovod DistributedOptimizer. ############################################### loss = [YoloLoss(anchors[mask]) for mask in anchor_masks] if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate( train_dataset.take(5717 // hvd.size())): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss # Horovod: add Horovod Distributed GradientTape. tape = hvd.DistributedGradientTape(tape) grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. # # Note: broadcast should be done after the first gradient step to ensure optimizer # initialization. if batch == 0: hvd.broadcast_variables(model.variables, root_rank=0) hvd.broadcast_variables(optimizer.variables(), root_rank=0) ############################# if hvd.rank() == 0: logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) ########################### avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss if hvd.rank() == 0: logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) if hvd.rank() == 0: logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() if hvd.rank() == 0: model.save_weights( 'checkpoints/horovod_yolov3_train_{}.tf'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset)
def basic(): # Remove already existing files in the output_frames directory :) counter1 = 0 files_del = glob.glob('data/output_frames/*') for counter1 in files_del: os.remove(counter1) # Remove already existing files in the Clipped directory :) counter2 = 0 files_clipped = glob.glob('data/Clipped/*') for counter2 in files_clipped: os.remove(counter2) # Request Video File to act like live stream f = request.files['file'] print('FILENAME: ', f.filename) print('SECURE FILE NAME: ', f.save(secure_filename(f.filename))) times = [] i = 0 h = 0 print(f.filename) try: vid = cv2.VideoCapture(f.filename) except: vid = cv2.VideoCapture(f.filename) out = None fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, 416) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) fps = (fps + (1. / (time.time() - t1))) / 2 img = draw_outputs(img, (boxes, scores, classes, nums), class_names) # Checking threshold i first row of 2D "scores array" because score array has only one row if (scores[0][1].any() > 0.50): cv2.imwrite('data/Clipped/clipp' + str(i) + '.jpg', img) storage.child('ClippedCrash/' + str(i) + '/crash.jpg').put('data/Clipped/clipp' + str(i) + '.jpg') link_image = storage.child('ClippedCrash/' + str(i) + '/crash.jpg').get_url(None) doc_ref = db.collection(u'Crash') doc_ref.add({ u'Name': u'Vehicle Crash', u'Type': u'Anomaly', u'Timestamp': str(datetime.now()), u'Image_Url': link_image }) elif (scores[0][0].any() > 0.50): cv2.imwrite('data/Clipped/clipp' + str(i) + '.jpg', img) # storage.child('LaneClipped/' +str(i)+ '/Lane.jpg').put('data/Clipped/clipp'+str(i)+'.jpg') # link_image = storage.child('ClippedLane/' +str(i)+ '/Lane.jpg').get_url(None) # doc_ref = db.collection(u'LaneVoilation') # doc_ref.add({ # u'Name': u'Lane Voilation', # u'Type': u'Anomaly', # u'Timestamp': datetime.now(), # u'Image Url': link_image # }) print(boxes, scores, classes, nums, class_names) global displayData displayData = { "scores": str(scores), "classes": str(classes), "classes_names": str(class_names) } # print(displayData) # data['boxes'] = i img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imwrite('data/output_frames/anomaly' + str(i) + '.jpg', img) i = i + 1 cv2.destroyAllWindows() # os.remove('data\output-vid\short.mp4') vid_array = [] for img_video in glob.glob('data/output_frames/*.jpg'): vid_img = cv2.imread(img_video) height, width, layers = vid_img.shape SIZE_vid = (width, height) vid_array.append(vid_img) out = cv2.VideoWriter('data/output-vid/short.mp4', cv2.VideoWriter_fourcc(*'mp4a'), 15, SIZE_vid) for n in range(len(vid_array)): out.write(vid_array[n]) out.release() cv2.destroyAllWindows() users_ref = db.collection(u'Crash') Crashdata = users_ref.stream() sasta = [] print(Crashdata) for doc in Crashdata: print(f'{doc.id} => {doc.to_dict()}') my_dict = doc.to_dict() sasta.append(my_dict) # print(my_dict) print(sasta) storage.child("videos/new.mp4").put("data\output-vid\short.mp4") links = storage.child('videos/new.mp4').get_url(None) return render_template('upload.html', l=(links, sasta))
def main(_argv): # set present path home = os.getcwd() # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort # model_filename = 'weights/mars-small128.pb' model_filename = os.path.join(home, "weights", "arcface_weights.h5") encoder = gdet.create_box_encoder(model_filename, batch_size=128) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] # Database 생성 face_db = dict() db_path = FLAGS.database for name in os.listdir(db_path): name_path = os.path.join(db_path, name) name_db = [] for i in os.listdir(name_path): if i.split(".")[1] != "jpg": continue id_path = os.path.join(name_path, i) img = cv2.imread(id_path) # img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # img_in = tf.expand_dims(img_in, 0) # img_in = transform_images(img_in, FLAGS.size) # boxes, scores, classes, nums = yolo.predict(img_in) boxes = np.asarray([[0, 0, img.shape[0], img.shape[1]]]) scores = np.asarray([[1]]) converted_boxes = convert_boxes(img, boxes, scores) features = encoder(img, converted_boxes) if features.shape[0] == 0: continue for f in range(features.shape[0]): name_db.append(features[f, :]) name_db = np.asarray(name_db) face_db[name] = dict({"used": False, "db": name_db}) try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 detection_list = [] while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) # print(boxes, scores, classes, nums) # time.sleep(5) t2 = time.time() times.append(t2 - t1) print(f'yolo predict time : {t2-t1}') times = times[-20:] t3 = time.time() ############# classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0], scores[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] t4 = time.time() print(f'feature generation time : {t4-t3}') #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] t5 = time.time() # Call the tracker tracker.predict() # tracker.update(detections) tracker.update(detections, face_db, FLAGS.max_face_threshold) t6 = time.time() print(f'tracking time : {t6-t5}') frame_index = frame_index + 1 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() face_name = track.get_face_name() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id)) + len(str(face_name))) * 23, int(bbox[1])), color, -1) # cv2.putText(img, class_name + face_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) cv2.putText( img, class_name + "-" + str(track.track_id) + "-" + face_name, (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) # print(class_name + "-" + str(track.track_id)) # detection_list.append(dict({"frame_no": str(frame_index), "id": str(track.track_id), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2])-int(bbox[0])), "height": str(int(bbox[3])-int(bbox[1]))})) if face_name != "": detection_list.append( dict({ "frame_no": str(frame_index), "id": str(face_name), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2]) - int(bbox[0])), "height": str(int(bbox[3]) - int(bbox[1])) })) ####### fps = (fps + (1. / (time.time() - t1))) / 2 # img = draw_outputs(img, (boxes, scores, classes, nums), class_names) # img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30), # cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (20, 20, 255), 2) if FLAGS.output: out.write(img) # frame_index = frame_index + 1 # list_file.write(str(frame_index)+' ') # if len(converted_boxes) != 0: # for i in range(0,len(converted_boxes)): # list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ') # list_file.write('\n') cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows() frame_list = sorted(detection_list, key=lambda x: (int(x["frame_no"]), int(x["id"]))) # pprint.pprint(frame_list) f = open(FLAGS.eval, "w") for a in frame_list: f.write(a["frame_no"] + " " + a["id"] + " " + a["x"] + " " + a["y"] + " " + a["width"] + " " + a["height"] + "\n") # 파일 닫기 f.close()
def upload_file(): # check if the post request has the file part if 'files[]' not in request.files: resp = jsonify({'message': 'No file part in the request'}) resp.status_code = 400 return resp files = request.files.getlist('files[]') errors = {} success = False #i am return image shape result = [] for file in files: if file and allowed_file(file.filename): filename = secure_filename(file.filename) print(filename) #file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) file.save(os.path.join(os.getcwd(), filename)) ##############################################################yolo code######################################### img_raw = tf.image.decode_image(open(filename, 'rb').read(), channels=3) img = tf.expand_dims(img_raw, 0) img = transform_images(img, size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() print('time: {}'.format(t2 - t1)) print('detections:') for i in range(nums[0]): print('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite(output_path + 'detection.jpg', img) print('output saved to: {}'.format(output_path + 'detection.jpg')) # prepare image for response _, img_encoded = cv2.imencode('.png', img) response = img_encoded.tostring() #remove temporary image os.remove(filename) success = True else: errors[file.filename] = 'File type is not allowed' if success and errors: errors['message'] = 'File(s) successfully uploaded' resp = jsonify(errors) resp.status_code = 206 return resp if success: resp = jsonify({'message': 'fwekfjwegfkjwegf'}) resp.status_code = 201 return resp else: resp = jsonify(errors) resp.status_code = 400 return resp
def main(_argv): region = load_ROI() # Definition of the parameters max_cosine_distance = 0.3 #Default = 0.5 nn_budget = None nms_max_overlap = 0.8 #Default = 0.5 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) video_name = os.path.splitext(FLAGS.video)[-2] weights = 'weights/yolov3_sang.tf' yolo.load_weights(weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') #WRITE RESULT result = "tracking_result/{}_track.txt".format(video_name) file_out = open(result,'w') path = os.getcwd() path = str(os.path.split(os.path.split(path)[0])[0]) #vid_path = os.path.join(path,"Data/{}/{}.mp4".format(video_name,video_name)) vid_path = os.path.join(path,"data/test_data/{}.mp4".format(video_name)) vid = cv2.VideoCapture(vid_path) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_index = -1 fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) frame_index = frame_index + 1 if frame_index % 100 == 0: print('FRAME: ',frame_index) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1) #cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1) #cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) x_cen = int((int(bbox[2]) + int(bbox[0]))/2) y_cen = int((int(bbox[3]) + int(bbox[1]))/2) if is_in_region((int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),region) == False: #NGOAI ROI THI XOA track.delete_track() cv2.putText(img,"FRAME: "+ str(frame_index),(0,45),cv2.FONT_HERSHEY_COMPLEX_SMALL,1,(0,255,0),2) #GHI FILE TRACKING_RESULT theo chuan CountMovement bb_width = int(bbox[2]) - int(bbox[0]) bb_height = int(bbox[3]) - int(bbox[1]) diagonal = math.sqrt(bb_height**2 + bb_width**2) file_out.write("{},{},{},{},{},{},{},{},{}\n".format(frame_index,track.track_id,x_cen,y_cen,diagonal,-1.0,class_to_classNumber(str(class_name)),bb_width,bb_height)) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN for det in detections: bbox = det.to_tlbr() cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(0,255,0), 1) # print fps on screen fps = ( fps + (1./(time.time()-t1)) ) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.output: out.release() cv2.destroyAllWindows()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') if FLAGS.tfrecord: dataset = load_tfrecord_dataset(FLAGS.tfrecord, FLAGS.classes, FLAGS.size) dataset = dataset.shuffle(512) img_raw, _label = next(iter(dataset.take(1))) else: img_raw = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img_raw, 0) img = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') # ------ detect_dict = {} if FLAGS.make_json: object_num = 1 for i in range(nums[0]): if not FLAGS.only_cars or class_names[int( classes[0][i])] in ('car', 'truck'): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) if FLAGS.make_json: if FLAGS.only_cars: detect_dict["Car " + str(object_num)] = np.array( boxes[0][i]).tolist() else: detect_dict["Object " + str(object_num) + ": " + class_names[int(classes[0][i])]] = \ np.array(boxes[0][i]).tolist() object_num += 1 if FLAGS.make_json: with open("outputs/output.json", "w") as outfile: json.dump(detect_dict, outfile) logging.info('output JSON saved to: outputs/output.json') # ------ img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names, only_cars=FLAGS.only_cars) cv2.imwrite(FLAGS.output, img) logging.info('output picture saved to: {}'.format(FLAGS.output))
return dataImages = [f for f in listdir('./data/polar_car_set/Images')] polar = ['0', '45', '90'] data9chan = [] for j in range(len(dataImages)): tmp = [] for l in range(len(polar)): img = tf.image.decode_image(open( './data/polar_car_set/Images/{0}/{1}.jpg'.format( dataImages[j], polar[l]), 'rb').read(), channels=3) img = transform_images(img, 416) tmp.append(img) tmp9chan = np.concatenate((tmp[0], tmp[1], tmp[2]), axis=2) data9chan.append(tmp9chan) data_list = data9chan data_Array = np.asarray(data9chan, dtype='float32') data_Array = data_Array.reshape(data_Array.shape[0], data_Array.shape[1], data_Array.shape[2] * 3, 3) nb_max_box = 100 list_labels = [] for i in labels.values(): i = np.concatenate((i, np.zeros(
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') logging.info('load cat model') model_cat = cnn.get_inception_v2_cat() logging.info('cat model loaded') logging.info('load dog model') model_dog = cnn.get_inception_v2_dog() logging.info('dog model loaded') if FLAGS.tfrecord: dataset = load_tfrecord_dataset( FLAGS.tfrecord, FLAGS.classes, FLAGS.size) dataset = dataset.shuffle(512) img_raw, _label = next(iter(dataset.take(1))) else: img_raw = tf.image.decode_image( open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img_raw, 0) img = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) t3 = time.time() cnn_output = cnn.get_more_data(img, model_cat, model_dog, (boxes, scores, classes, nums), class_names) t4 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('primary detections:') for i in range(nums[0]): logging.info('\t{}, {:.2f}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]))) img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img, cat_det, dog_det = draw_outputs(img, model_cat, model_dog, (boxes, scores, classes, nums), class_names, cnn_output) cv2.imwrite(FLAGS.output, img) if np.size(cat_det) != 0 or np.size(dog_det) != 0: logging.info('secondary detections :') logging.info('time: {}'.format(t4 - t3)) if np.size(cat_det) != 0: for cat in cat_det: logging.info('\t{}, {:.2f}'.format(cat[0], cat[1])) if np.size(dog_det) != 0: for dog in dog_det: logging.info('\t {}, {:.2f}'.format(dog[0], dog[1])) logging.info('output saved to: {}'.format(FLAGS.output)) cv2.imshow(FLAGS.output, img) cv2.waitKey(0)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) countFrame = 0 while True: _, img = vid.read() countFrame += 1 if img is None: logging.warning("Empty Frame") time.sleep(0.1) continue img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) t2 = time.time() times.append(t2 - t1) times = times[-20:] result = [] if (FLAGS.mode != 'basic'): if (FLAGS.mode == 'optical_flow'): mode = 1 elif (FLAGS.mode == 'final'): mode = 3 else: mode = 2 img1 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if (countFrame >= 2): flow = opticalFlow(img1, img2) if (FLAGS.mode == 'final'): img, result = my_draw_flow(img2, flow, 1, boxes[0], nums[0], mode) print(result) else: img = my_draw_flow(img1, flow, 4, boxes[0], nums[0], mode) img2 = img1 img = draw_outputs(img, (boxes, scores, classes, nums), class_names, result) img = cv2.putText( img, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) if FLAGS.output: out.write(img) # cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()
def main(): class_names = [c.strip() for c in open('./data/labels/coco.names').readlines()] yolo = YoloV3(classes=len(class_names)) yolo.load_weights('./weights/yolov3.tf') max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 0.8 model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget) tracker = Tracker(metric) vid = cv2.VideoCapture("traffic1.mkv") #vid = cv2.VideoCapture("video.webm") #vid = VideoCaptureAsync("video.webm") #vid = vid.start() codec = cv2.VideoWriter_fourcc(*'XVID') vid_fps =int(vid.get(cv2.CAP_PROP_FPS)) vid_width,vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) out = cv2.VideoWriter('./data/video/results.avi', codec, vid_fps, (vid_width, vid_height)) from collections import deque pts = [deque(maxlen=30) for _ in range(1000)] counter = [] directory1 = "/home/ecl/Downloads/Limon/Object_Tracking/imgzmq/dataset/" result = [] new_cnt = 0 while True: _, img = vid.read() if img is None: print('Completed') break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, 416) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)] boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0,1,20)] #current_count = int(0) #count = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update >1: continue bbox = track.to_tlbr() class_name= track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]),int(bbox[1])), (int(bbox[2]),int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name) +len(str(track.track_id)))*17, int(bbox[1])), color, -1) cv2.putText(img, class_name+"-"+str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), 0, 0.75, (255, 255, 255), 2) center = (int(((bbox[0]) + (bbox[2]))/2), int(((bbox[1])+(bbox[3]))/2)) pts[track.track_id].append(center) for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j-1] is None or pts[track.track_id][j] is None: continue thickness = int(np.sqrt(64/float(j+1))*2) cv2.line(img, (pts[track.track_id][j-1]), (pts[track.track_id][j]), color, thickness) height, width, _ = img.shape cv2.line(img, (0, int(3*height/6+height/20)), (width, int(3*height/6+height/20)), (0, 255, 0), thickness=2) #cv2.line(img, (0, int(3*height/6-height/20)), (width, int(3*height/6-height/20)), (0, 255, 0), thickness=2) cv2.line(img, (220, 460), (1000, 450), (0, 0, 255), 2) center_y = int(((bbox[1])+(bbox[3]))/2) #count = 0 if center_y <= int(3*height/6+height/20) and center_y >= int(3*height/6-height/20): if class_name == 'car' or class_name == 'truck' or class_name == 'person': counter.append(int(track.track_id)) directory = r'/home/ecl/Downloads/Limon/Object_Tracking/imgzmq/dataset' for filename in os.listdir(directory): if filename.endswith(".jpg") or filename.endswith(".png"): a1 = os.path.join(directory, filename) b = int(re.search(r'\d+', a1).group()) result.append(b) else: continue b1 = max(result) + 1 count = 0 while(True): count += 1 print(count) #count = b1 ##increase image size and resoulation #new_img = img[int(bbox[0]):(int(bbox[2])+int(bbox[3])), int(bbox[1]):(int(bbox[2])+int(bbox[3]))] new_img = img[int(bbox[1]):(int(bbox[1])+int(bbox[3])), int(bbox[0]):(int(bbox[0])+int(bbox[2]))] #new_rgb = rgb[int(bbox[1]):(int(bbox[1])+int(bbox[3])), int(bbox[0]):(int(bbox[0])+int(bbox[2]))] #new_img = cv2.resize(new_img, (360, 360), interpolation = cv2.INTER_NEAREST) cv2.imwrite(directory1 + f"image{b1}.jpg", new_img) if count > 1: print("break the loop..............") break #current_count += 1 total_count = len(set(counter)) #cv2.putText(img, "Current Vehicle Count: " + str(current_count), (0, 80), 0, 1, (0, 0, 255), 2) cv2.putText(img, "Total Vehicle Count: " + str(total_count), (0,130), 0, 1, (0,0,255), 2) fps = 1./(time.time()-t1) cv2.putText(img, "FPS: {:.2f}".format(fps), (0,30), 0, 1, (0,0,255), 2) #cv2.resizeWindow('output', 1024, 768) cv2.imshow('output', img) out.write(img) if cv2.waitKey(1) == ord('q'): break vid.release() out.release() cv2.destroyAllWindows()
def scanner(self): FLAGS(sys.argv) self.physical_devices = tf.config.experimental.list_physical_devices( 'GPU') if len(self.physical_devices) > 0: tf.config.experimental.set_memory_growth(self.physical_devices[0], True) if FLAGS.tiny: self.yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: self.yolo = YoloV3(classes=FLAGS.num_classes) self.yolo.load_weights(FLAGS.weights) logging.info('weights loaded') self.class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] try: self.vid = cv2.VideoCapture((0)) except: self.vid = cv2.VideoCapture(FLAGS.video) self.out = None if FLAGS.output: # by default VideoCapture returns float instead of int self.width = int(self.vid.get(cv2.CAP_PROP_FRAME_WIDTH)) self.height = int(self.vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.fps = int(self.vid.get(cv2.CAP_PROP_FPS)) self.codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) self.out = cv2.VideoWriter(FLAGS.output, self.codec, self.fps, (self.width, self.height)) self.fps = 0.0 self.count = 0 a = True while a: _, self.img = self.vid.read() if self.img is None: logging.warning("Empty Frame") time.sleep(0.1) self.count += 1 if self.count < 3: continue else: break self.img_in = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB) self.img_in = tf.expand_dims(self.img_in, 0) self.img_in = transform_images(self.img_in, FLAGS.size) self.t1 = time.time() self.boxes, self.scores, self.classes, self.nums = self.yolo.predict( self.img_in) self.fps = (self.fps + (1. / (time.time() - self.t1))) / 2 self.img, self.pname = draw_outputs( self.img, (self.boxes, self.scores, self.classes, self.nums), self.class_names) pname = self.pname print('in main funcion : ', self.pname) self.img = cv2.putText(self.img, "FPS: {:.2f}".format(self.fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # draw_outputs(img, outputs, class_names) if FLAGS.output: self.out.write(self.img) cv2.namedWindow('Product Scanner') cv2.imshow('Product Scanner', self.img) if cv2.waitKey(100) & 0xFF == ord('e'): self.dbdata() print('destroying scanner window') cv2.destroyWindow('Product Scanner') a = False
def get_image(): image = request.files["images"] # print("######### IMG", image) image_name = image.filename image.save(os.path.join(os.getcwd(), image_name)) img_raw = tf.image.decode_image(open(image_name, 'rb').read(), channels=3) img = tf.expand_dims(img_raw, 0) img = transform_images(img, size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() print('time: {}'.format(t2 - t1)) print('detections:') for i in range(nums[0]): print('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite(output_path + 'detection.jpg', img) print('output saved to: {}'.format(output_path + 'detection.jpg')) # prepare image for response _, img_encoded = cv2.imencode('.png', img) response = img_encoded.tostring() ###################################################################### image_path = os.path.join(os.getcwd(), 'detections/detection.jpg') image = cv2.imread(image_path) print(image_path) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(gray, (5, 5), 0) # perform edge detection, find contours in the edge map, and sort the # resulting contours from left-to-right edged = cv2.Canny(blurred, 30, 150) cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = imutils.grab_contours(cnts) cnts = sort_contours(cnts, method="left-to-right")[0] # initialize the list of contour bounding boxes and associated # characters that we'll be OCR'ing chars = [] # loop over the contours for c in cnts: # compute the bounding box of the contour (x, y, w, h) = cv2.boundingRect(c) # filter out bounding boxes, ensuring they are neither too small # nor too large if (w >= 5 and w <= 150) and (h >= 15 and h <= 120): # extract the character and threshold it to make the character # appear as white (foreground) on a black background, then # grab the width and height of the thresholded image roi = gray[y:y + h, x:x + w] thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] (tH, tW) = thresh.shape # if the width is greater than the height, resize along the # width dimension if tW > tH: thresh = imutils.resize(thresh, width=32) # otherwise, resize along the height else: thresh = imutils.resize(thresh, height=32) # re-grab the image dimensions (now that its been resized) # and then determine how much we need to pad the width and # height such that our image will be 32x32 (tH, tW) = thresh.shape dX = int(max(0, 32 - tW) / 2.0) dY = int(max(0, 32 - tH) / 2.0) # pad the image and force 32x32 dimensions padded = cv2.copyMakeBorder(thresh, top=dY, bottom=dY, left=dX, right=dX, borderType=cv2.BORDER_CONSTANT, value=(0, 0, 0)) padded = cv2.resize(padded, (32, 32)) # prepare the padded image for classification via our # handwriting OCR model padded = padded.astype("float32") / 255.0 padded = np.expand_dims(padded, axis=-1) # update our list of characters that will be OCR'd chars.append((padded, (x, y, w, h))) # extract the bounding box locations and padded characters boxes = [b[1] for b in chars] chars = np.array([c[0] for c in chars], dtype="float32") # OCR the characters using our handwriting recognition model preds = model.predict(chars) # define the list of label names labelNames = "0123456789" labelNames += "ABCDEFGHIJKLMNOPQRSTUVWXYZ" labelNames = [l for l in labelNames] lst = [] # loop over the predictions and bounding box locations together for (pred, (x, y, w, h)) in zip(preds, boxes): # find the index of the label with the largest corresponding # probability, then extract the probability and label i = np.argmax(pred) prob = pred[i] label = labelNames[i] lst.append(label + ":" + str(prob)) # draw the prediction on the image print("[INFO] {} - {:.2f}%".format(label, prob * 100)) cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.putText(image, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2) # # show the image # cv2.imshow("Image", image) # cv2.waitKey(0) # remove temporary image os.remove(image_name) cv2.imshow("Image", image) cv2.waitKey(0) try: return jsonify({"response": lst}), 200 except FileNotFoundError: abort(404)
def main(args): tf.config.experimental.list_physical_devices('GPU') # tf.device(f'/gpu:{args.gpu_num}') train_path = args.train_dataset valid_path = args.valid_dataset weights_path = args.weights # Path to text? file containing all classes, 1 per line classes_file = args.classes # Usually fit # mode = 'fit' # Can be 'fit', 'eager_fit', 'eager_tf', 'valid' mode = args.mode ''' 'fit: model.fit, ' 'eager_fit: model.fit(run_eagerly=True), ' 'eager_tf: custom GradientTape' ''' # Usually darknet transfer = args.transfer ''' 'none: Training from scratch, ' 'darknet: Transfer darknet, ' 'no_output: Transfer all but output, ' 'frozen: Transfer and freeze all, ' 'fine_tune: Transfer all and freeze darknet only'), 'pre': Use a pre-trained model for validation ''' image_size = cfg.IMAGE_SIZE num_epochs = args.epochs batch_size = args.batch_size learning_rate = cfg.LEARNING_RATE num_classes = args.num_classes # num class for `weights` file if different, useful in transfer learning with different number of classes weight_num_classes = args.num_weight_class # saved_weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/weights/' saved_weights_path = '/home/justin/ml_models/yolov3-tf2/weights/trained_{}.tf'.format(num_epochs) saved_weights_path = args.saved_weights # Original Anchors below anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], np.float32) / 608 anchors = cfg.YOLO_ANCHORS anchor_masks = cfg.YOLO_ANCHOR_MASKS physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if args.no_train: print('Skipping training...') else: start_time = time.time() model = YoloV3(image_size, training=True, classes=num_classes) train_dataset = dataset.load_tfrecord_dataset(train_path, classes_file, image_size) train_dataset = train_dataset.shuffle(buffer_size=512) train_dataset = train_dataset.batch(batch_size) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, image_size), dataset.transform_targets(y, anchors, anchor_masks, image_size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_tfrecord_dataset(valid_path, classes_file, image_size) val_dataset = val_dataset.batch(batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, image_size), dataset.transform_targets(y, anchors, anchor_masks, image_size))) # Configure the model for transfer learning if transfer == 'none': pass # Nothing to do elif transfer in ['darknet', 'no_output']: # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers model_pretrained = YoloV3(image_size, training=True, classes=weight_num_classes or num_classes) model_pretrained.load_weights(weights_path) if transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) elif transfer == 'no_output': for layer in model.layers: if not layer.name.startswith('yolo_output'): layer.set_weights(model_pretrained.get_layer( layer.name).get_weights()) freeze_all(layer) elif transfer == 'pre': model = YoloV3(image_size, training=False, classes=num_classes) model.load_weights(weights_path) else: # All other transfer require matching classes model.load_weights(weights_path) if transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif transfer == 'frozen': # freeze everything freeze_all(model) optimizer = tf.keras.optimizers.Adam(lr=learning_rate) loss = [YoloLoss(anchors[mask], classes=num_classes) for mask in anchor_masks] # Passing loss as a list might sometimes fail? dict might be better? if mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, num_epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients( zip(grads, model.trainable_variables)) print("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss print("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) print("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights( 'checkpoints/yolov3_train_{}.tf'.format(epoch)) elif mode == 'valid': pass # Pass this step for validation only else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1, min_lr=1e-4, patience=50), # EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/midpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir=f'logs/{saved_weights_path[:-3]}') ] history = model.fit(train_dataset, epochs=num_epochs, callbacks=callbacks, validation_data=val_dataset) print(f'Saving weights to: {saved_weights_path}') model.save_weights(saved_weights_path) finish_time = time.time() train_time = finish_time - start_time print('Training time elapsed: {}'.format(train_time)) # Calculate mAP if args.validate: print('Validating...') model = YoloV3(image_size, training=False, classes=num_classes) model.load_weights(saved_weights_path).expect_partial() batch_size = 1 val_dataset = dataset.load_tfrecord_dataset(valid_path, classes_file, image_size) val_dataset = val_dataset.batch(batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, image_size), dataset.transform_targets(y, anchors, anchor_masks, image_size))) images = [] for img, labs in val_dataset: img = np.squeeze(img) images.append(img) predictions = [] evaluator = Evaluator(iou_thresh=args.iou) # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class]) boxes, scores, classes, num_detections = model.predict(val_dataset) print(boxes.shape) print(boxes[0]) # boxes -> (num_imgs, num_detections, box coords) filtered_labels = [] for _, label in val_dataset: filt_labels = flatten_labels(label) filtered_labels.append(filt_labels) # i is the num_images index for img in range(len(num_detections)): row = [] for sc in range(len(scores[img])): if scores[img][sc] > 0: row.append(np.hstack([boxes[img][sc] * image_size, scores[img][sc], classes[img][sc]])) predictions.append(np.asarray(row)) predictions = np.asarray(predictions) # numpy array of shape [num_imgs x num_preds x 6] if len(predictions) == 0: # No predictions made print('No predictions made - exiting.') exit() # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]] # Box coords should be in format x1 y1 x2 y2 evaluator(predictions, filtered_labels, images, roc=False) # Check gts box coords if args.valid_imgs: # Predictions print('Valid Images...') # yolo = YoloV3(classes=num_classes) yolo = YoloV3(image_size, training=False, classes=num_classes) yolo.load_weights(saved_weights_path).expect_partial() print('weights loaded') print('Validation Image...') # Find better way to do this so not requiring manual changes class_dict = cfg.CLASS_DICT class_names = list(class_dict.values()) print('classes loaded') val_dataset = dataset.load_tfrecord_dataset(valid_path, classes_file, image_size) val_dataset = val_dataset.batch(1) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, image_size), dataset.transform_targets(y, anchors, anchor_masks, image_size))) # boxes, scores, classes, num_detections index = 0 for img_raw, _label in val_dataset.take(5): print(f'Index {index}') #img = tf.expand_dims(img_raw, 0) img = transform_images(img_raw, image_size) img = img * 255 boxes, scores, classes, nums = yolo(img) filt_labels = flatten_labels(_label) boxes = tf.expand_dims(filt_labels[:, 0:4], 0) scores = tf.expand_dims(filt_labels[:, 4], 0) classes = tf.expand_dims(filt_labels[:, 5], 0) nums = tf.expand_dims(filt_labels.shape[0], 0) img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0) # img = img * 255 output = 'test_images/test_{}.jpg'.format(index) # output = '/Users/justinbutler/Desktop/test/test_images/test_{}.jpg'.format(index) # print('detections:') # for i in range(nums[index]): # print('\t{}, {}, {}'.format(class_names[int(classes[index][i])], # np.array(scores[index][i]), # np.array(boxes[index][i]))) # if i > 10: # continue img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0) img = img * 255 cv2.imwrite(output, img) index = index + 1 if args.visual_data: print('Visual Data...') val_dataset = dataset.load_tfrecord_dataset(valid_path, classes_file, image_size) val_dataset = val_dataset.batch(1) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, image_size), dataset.transform_targets(y, anchors, anchor_masks, image_size))) index = 0 for img_raw, _label in val_dataset.take(5): print(f'Index {index}') # img = tf.expand_dims(img_raw, 0) img = transform_images(img_raw, image_size) output = 'test_images/test_labels_{}.jpg'.format(index) # output = '/Users/justinbutler/Desktop/test/test_images/test_labels_{}.jpg'.format(index) filt_labels = flatten_labels(_label) boxes = tf.expand_dims(filt_labels[:, 0:4], 0) scores = tf.expand_dims(filt_labels[:, 4], 0) classes = tf.expand_dims(filt_labels[:, 5], 0) nums = tf.expand_dims(filt_labels.shape[0], 0) img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0) img = img * 255 cv2.imwrite(output, img) index = index + 1 return
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') if FLAGS.tfrecord: dataset = load_tfrecord_dataset( FLAGS.tfrecord, FLAGS.classes, FLAGS.size) dataset = dataset.shuffle(512) img_raw, _label = next(iter(dataset.take(1))) else: #OPEN THE IMAGE LIST #PARSE THE FIRST IMAGE FILENAME #OPEN THE IMAGE AS img_raw #STUB -- CHANGE LATER image_name = FLAGS.image print(image_name) img_raw = tf.image.decode_image( open(FLAGS.image, 'rb').read(), channels=3) # IMAGE PROCESSING AFTER WE TAKE THE RAW ONE. img = tf.expand_dims(img_raw, 0) img = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() print("FLAGS.size: " + format(FLAGS.size)) print("Image RAW size: " + format(img_raw.shape)) print("Image size: " + format(img.shape)) logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite(FLAGS.output, img) logging.info('output saved to: {}'.format(FLAGS.output)) print("Persons detected:") for i in range(nums[0]): # Only process class 0 = person. if(int(classes[0][i]) == 0): print("\tPersonID " + format(i)) print("\t\tSCORE: " + format(scores[0][i])) int_box = convert_box_to_img_size(img_raw.shape,np.array(boxes[0][i])) print("\t\tBOX: " + format(int_box))
def main(): class_names = [ c.strip() for c in open('./data/labels/coco.names').readlines() ] yolo = YoloV3(classes=len(class_names)) yolo.load_weights('./weights/yolov3.tf') imageHub = imagezmq.ImageHub() max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 0.8 model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget) tracker = Tracker(metric) #vid = cv2.VideoCapture('./data/video/traffic1.mkv') #vid = cv2.VideoCapture("video.webm") #vid = VideoCaptureAsync("video.webm") #vid = vid.start() codec = cv2.VideoWriter_fourcc(*'XVID') #vid_fps =int(vid.get(cv2.CAP_PROP_FPS)) #vid_width,vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) #out = cv2.VideoWriter('./data/video/results.avi', codec, vid_fps, (vid_width, vid_height)) out = cv2.VideoWriter('./data/video/results.avi', codec, 20, (480, 480)) from collections import deque pts = [deque(maxlen=30) for _ in range(1000)] counter = [] while True: #_, img = vid.read() (rpiName, img) = imageHub.recv_image() imageHub.send_reply(b'OK') if img is None: print('Completed') break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, 416) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] #current_count = int(0) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) pts[track.track_id].append(center) for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(img, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), color, thickness) height, width, _ = img.shape #cv2.line(img, (0, int(3*height/6+height/20)), (width, int(3*height/6+height/20)), (0, 255, 0), thickness=2) #cv2.line(img, (0, int(3*height/6-height/20)), (width, int(3*height/6-height/20)), (0, 255, 0), thickness=2) center_y = int(((bbox[1]) + (bbox[3])) / 2) if center_y <= int(3 * height / 6 + height / 20) and center_y >= int(3 * height / 6 - height / 20): if class_name == 'car' or class_name == 'truck' or class_name == 'person': counter.append(int(track.track_id)) #current_count += 1 total_count = len(set(counter)) #cv2.putText(img, "Current Vehicle Count: " + str(current_count), (0, 80), 0, 1, (0, 0, 255), 2) cv2.putText(img, "Total Vehicle Count: " + str(total_count), (0, 130), 0, 1, (0, 0, 255), 2) fps = 1. / (time.time() - t1) cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), 0, 1, (0, 0, 255), 2) #cv2.resizeWindow('output', 1024, 768) cv2.imshow('output', img) out.write(img) if cv2.waitKey(1) == ord('q'): break #vid.release() out.release() cv2.destroyAllWindows()
def get_detections(): raw_images = [] images = request.files.getlist("images") print(images) image_names = [] for image in images: image_name = image.filename image_names.append(image_name) image.save(os.path.join(os.getcwd(), image_name)) img_raw = tf.image.decode_image(open(image_name, 'rb').read(), channels=3) raw_images.append(img_raw) num = 0 # create list for final response response = [] for j in range(len(raw_images)): # create list of responses for current image responses = [] raw_img = raw_images[j] num += 1 img = tf.expand_dims(raw_img, 0) img = transform_images(img, size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() print('time: {}'.format(t2 - t1)) print('detections:') for i in range(nums[0]): print('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) '''responses.append({ "class": 'Pothole', "confidence": float("{0:.2f}".format(np.array(scores[0][i])*100)) })''' response.append({ "class": 'Pothole', "confidence": float("{0:.2f}".format(np.array(scores[0][i]) * 100)) #"image": image_names[j], #"detections": responses }) img = cv2.cvtColor(raw_img.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite(output_path + 'detection' + str(num) + '.jpg', img) print('output saved to: {}'.format(output_path + 'detection' + str(num) + '.jpg')) #remove temporary images for name in image_names: os.remove(name) try: return jsonify({"response": response}), 200 except FileNotFoundError: abort(404)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) # anchors是固定的,每一层每个anchors对应固定的3个anchors boxes,共三层 if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks # 目的是什么???不清楚 train_dataset = dataset.load_fake_dataset() # 载入训练数据,生成dataset.map,进行预处理 if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset( FLAGS.dataset, FLAGS.classes, FLAGS.size) # 训练数据打乱 train_dataset = train_dataset.shuffle(buffer_size=512) # 训练数据设置batch大小 train_dataset = train_dataset.batch(FLAGS.batch_size) # 训练数据匹配anchor,做map预处理 train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) # 训练数据使用多线程并行计算预处理,自动设置为最大的可用线程数,机器算力拉满 train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) # 同train_dataset val_dataset = dataset.load_fake_dataset() if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset( FLAGS.val_dataset, FLAGS.classes, FLAGS.size) val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) # Configure the model for transfer learning 为迁移学习配置模型,所谓迁移,就是利用yolo的结构做为预训练模型 if FLAGS.transfer == 'none': pass # Nothing to do elif FLAGS.transfer in ['darknet', 'no_output']: # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers 载入预训练模型的预处理数据 if FLAGS.tiny: model_pretrained = YoloV3Tiny( FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) else: model_pretrained = YoloV3( FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) # 载入预训练模型权重 model_pretrained.load_weights(FLAGS.weights) # 载入backbone及其参数权重,即darknet,做为预训练模型的主干,训练过程中对除backbone及其参数权重以外的参数做训练 if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) # 载入预训练模型的全部,除了输出部分,即nms部分,训练过程只对输出部分(nms)参数做训练 elif FLAGS.transfer == 'no_output': for l in model.layers: if not l.name.startswith('yolo_output'): l.set_weights(model_pretrained.get_layer( l.name).get_weights()) freeze_all(l) else: # All other transfer require matching classes model.load_weights(FLAGS.weights) # 载入backbone,即darknet,做为预训练模型的主干,训练过程中不改变主干backbone的结构,对全网络参数做训练 if FLAGS.transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') freeze_all(darknet) # 载入全部模型,整体做为预处理模型,训练过程中,不做任何改变 elif FLAGS.transfer == 'frozen': # freeze everything freeze_all(model) # 优化器设置 optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) # 损失函数设置 loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks] # Eager Mode(动态图模式),便于可以得到即时的反馈,用于训练的时候便于观察变化 if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training # 损失函数的均值观测 avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): # 根据训练数据损失函数的反馈值,逐步优化梯度,进而优化模型参数 with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients( zip(grads, model.trainable_variables)) # 日志展示损失loss的变化 logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) # val同上train的部分 for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) # 日志展示一整个epoch后的train和val的最终loss logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) # 展示完成,复位,重置 avg_loss.reset_states() avg_val_loss.reset_states() # 将本次训练结束后得到的模型参数保存并输出 model.save_weights( 'checkpoints/yolov3_train_{}.tf'.format(epoch)) # 如果不需要观测实时反馈变化,那么就后台训练,日志端不会看到任何信息 else: # 模型配置器 model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit'), metrics=['accuracy']) # 自定义模型控制器,创建一个保存模型权重的回调 callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] # # period = 2, 表示每隔1个epoch保存一次checkpoint # callbacks = [ # ReduceLROnPlateau(verbose=1), # EarlyStopping(patience=3, verbose=1), # ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', # verbose=1, save_weights_only=True, period = 2), # TensorBoard(log_dir='logs') # ] # 模型训练,使用新的回调训练模型 history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset, validation_freq=1)
(vid_width, vid_height)) from _collections import deque pts = [deque(maxlen=30) for _ in range(1000)] counter = [] while True: _, img = vid.read() if img is None: print('Completed') break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, 416) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name,
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, FLAGS.size) train_dataset = train_dataset.shuffle(buffer_size=512) train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes, FLAGS.size) val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) # Configure the model for transfer learning if FLAGS.transfer == 'none': pass # Nothing to do elif FLAGS.transfer in ['darknet', 'no_output']: # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers if FLAGS.tiny: model_pretrained = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) else: model_pretrained = YoloV3(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) model_pretrained.load_weights(FLAGS.weights) if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) elif FLAGS.transfer == 'no_output': for l in model.layers: if not l.name.startswith('yolo_output'): l.set_weights( model_pretrained.get_layer(l.name).get_weights()) freeze_all(l) else: # All other transfer require matching classes model.load_weights(FLAGS.weights) if FLAGS.transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.transfer == 'frozen': # freeze everything freeze_all(model) optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks ] if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch)) model.save_weights( 'checkpoints/yolov3_train_{}.ckpt'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) callbacks = [ tf.keras.callbacks.ReduceLROnPlateau(verbose=1, factor=0.2, patience=3, cooldown=0), tf.keras.callbacks.EarlyStopping(patience=3, verbose=1), tf.keras.callbacks.ModelCheckpoint( filepath='./checkpoints/yolov3-tiny_train.ckpt', verbose=1, save_weights_only=True, save_best_only=True), tf.keras.callbacks.ModelCheckpoint( filepath='./checkpoints/yolov3-tiny_train.tf', verbose=0, save_weights_only=True, save_best_only=True), tf.keras.callbacks.TensorBoard(log_dir='logs') ] history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset) saved_model_dir = './model/yolov3-tiny_train' model.save(saved_model_dir) converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir) tflite_model = converter.convert() open('model_tflite.tflite', 'wb').write(tflite_model)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') # Setup if FLAGS.multi_gpu: for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) strategy = tf.distribute.MirroredStrategy() print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) BATCH_SIZE = FLAGS.batch_size * strategy.num_replicas_in_sync FLAGS.batch_size = BATCH_SIZE with strategy.scope(): model, optimizer, loss, anchors, anchor_masks = setup_model() else: model, optimizer, loss, anchors, anchor_masks = setup_model() if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, FLAGS.size) else: train_dataset = dataset.load_fake_dataset() train_dataset = train_dataset.shuffle(buffer_size=512) train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes, FLAGS.size) else: val_dataset = dataset.load_fake_dataset() val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch)) else: callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] start_time = time.time() history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset) end_time = time.time() - start_time print(f'Total Training Time: {end_time}') import mlflow mlflow.set_experiment("signal_detect") epoch = len(history.history['loss']) mlflow.log_param("dataset", FLAGS.dataset) mlflow.log_param("val_dataset", FLAGS.val_dataset) mlflow.log_param("epoch", FLAGS.epochs) mlflow.log_param("batch_size", FLAGS.batch_size) mlflow.log_param("learning_rate", FLAGS.learning_rate) mlflow.log_metric("loss", float(history.history['loss'][epoch - 1])) mlflow.log_metric( "yolo_output_0_loss", float(history.history['yolo_output_0_loss'][epoch - 1])) mlflow.log_metric( "yolo_output_1_loss", float(history.history['yolo_output_1_loss'][epoch - 1])) mlflow.log_metric("val_loss", float(history.history['val_loss'][epoch - 1])) mlflow.log_metric( "val_yolo_output_0_loss", float(history.history['val_yolo_output_0_loss'][epoch - 1])) mlflow.log_metric( "val_yolo_output_1_loss", float(history.history['val_yolo_output_1_loss'][epoch - 1])) mlflow.log_artifact("checkpoints/yolov3_train_" + str(epoch) + ".tf.data-00000-of-00001") mlflow.log_artifact("checkpoints/yolov3_train_" + str(epoch) + ".tf.index")
def main(_argv): img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img, 0) transform_images(img, FLAGS.size)
def main(_argv): if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks train_dataset = dataset.load_fake_dataset() if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes) train_dataset = train_dataset.shuffle(buffer_size=1024) # TODO: not 1024 train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map( lambda x, y: (dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, 80))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_fake_dataset() if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes) val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map( lambda x, y: (dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, 80))) if FLAGS.transfer != 'none': model.load_weights(FLAGS.weights) if FLAGS.transfer == 'fine_tune': # freeze darknet darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.mode == 'frozen': # freeze everything freeze_all(model) else: # reset top layers if FLAGS.tiny: # get initial weights init_model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) else: init_model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) if FLAGS.transfer == 'darknet': for l in model.layers: if l.name != 'yolo_darknet' and l.name.startswith('yolo_'): l.set_weights( init_model.get_layer(l.name).get_weights()) else: freeze_all(l) elif FLAGS.transfer == 'no_output': for l in model.layers: if l.name.startswith('yolo_output'): l.set_weights( init_model.get_layer(l.name).get_weights()) else: freeze_all(l) optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks ] if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset)
def main(_argv): if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks train_dataset = dataset.load_fake_dataset() if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, FLAGS.size) tsteps = sum(1 for _ in train_dataset) train_dataset = train_dataset.shuffle(buffer_size=256, reshuffle_each_iteration=True) train_dataset = train_dataset.batch(FLAGS.batch_size, drop_remainder=True) train_dataset = train_dataset.repeat(FLAGS.epochs) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_fake_dataset() if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes, FLAGS.size) vsteps = sum(1 for _ in val_dataset) val_dataset = val_dataset.batch(FLAGS.batch_size, drop_remainder=True) val_dataset = val_dataset.repeat(FLAGS.epochs) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) # Configure the model for transfer learning if FLAGS.transfer == 'none': pass # Nothing to do elif FLAGS.transfer in ['darknet', 'no_output']: # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers if FLAGS.tiny: model_pretrained = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) else: model_pretrained = YoloV3(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) model_pretrained.load_weights(FLAGS.weights) if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) elif FLAGS.transfer == 'no_output': for l in model.layers: if not l.name.startswith('yolo_output'): l.set_weights( model_pretrained.get_layer(l.name).get_weights()) freeze_all(l) else: # All other transfer require matching classes model.load_weights(FLAGS.weights) if FLAGS.transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.transfer == 'frozen': # freeze everything freeze_all(model) optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks ] if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True, period=10), TensorBoard(log_dir='logs') ] model.fit(train_dataset, epochs=FLAGS.epochs, steps_per_epoch=int(tsteps / FLAGS.batch_size), callbacks=callbacks, validation_data=val_dataset, validation_steps=int(vsteps / FLAGS.batch_size))