def viz_voc_prep(batch_data, idx, input_height, input_width, val, box_color=(0, 255, 0), thickness=1, txt_color=(255, 0, 0)): imgs, labels = prep_voc_data(batch_data, input_height, input_width, val=val) img = imgs[idx].numpy().copy() img = (img * 255).astype(np.uint8) label = labels[idx].numpy() for pts in label: cx_rel, cy_rel, w_rel, h_rel, cls_idx = pts cls_name = VOC_CLS_MAP[cls_idx] xmin_rel, ymin_rel = cx_rel - (w_rel / 2), cy_rel - (h_rel / 2) xmax_rel, ymax_rel = cx_rel + (w_rel / 2), cy_rel + (h_rel / 2) xmin, ymin = round(xmin_rel * input_width), round(ymin_rel * input_height) xmax, ymax = round(xmax_rel * input_width), round(ymax_rel * input_height) cv2.rectangle(img, (xmin, ymin), (xmax, ymax), box_color, thickness) cv2.putText(img, cls_name, (xmin, ymin), fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=0.5, color=txt_color) return img
def main(_argv): yolo = tf.saved_model.load( export_dir=FLAGS.pb_dir, tags=None, options=None, ) voc = GetVoc(batch_size=FLAGS.batch_size) val_ds = voc.get_val_ds(sample_ratio=FLAGS.val_ds_sample_ratio) val_preds = list() val_labels_path = os.path.join(ProjectPath.DATASETS_DIR.value, 'voc_tfds', 'eval', 'val_labels_448_full.pickle') if FLAGS.val_ds_sample_ratio == 1. and os.path.exists(val_labels_path): val_labels = pickle.load(open(val_labels_path, 'rb')) else: val_labels = get_labels(ds=val_ds, input_height=cfg.input_height, input_width=cfg.input_width, cls_map=VOC_CLS_MAP, full_save=False) img_id = 0 for _step, batch_data in tqdm.tqdm(enumerate(val_ds, 1), total=len(val_ds), desc='Validation'): batch_imgs, _batch_labels = prep_voc_data( batch_data, input_height=cfg.input_height, input_width=cfg.input_width, val=True) yolo_output_raw = yolo(batch_imgs, training=False) yolo_boxes = yolo_output2boxes(yolo_output_raw, cfg.input_height, cfg.input_width, cfg.cell_size, cfg.boxes_per_cell) for i in range(len(yolo_boxes)): output_boxes = box_postp2use(yolo_boxes[i], cfg.nms_iou_thr, 0.) if output_boxes.size == 0: img_id += 1 continue for output_box in output_boxes: *pts, conf, cls_idx = output_box cls_name = VOC_CLS_MAP[cls_idx] val_preds.append([*map(round, pts), conf, cls_name, img_id]) img_id += 1 voc_ap = CalcVOCmAP(labels=val_labels, preds=val_preds, iou_thr=0.5, conf_thr=0.0) ap_summary = voc_ap.get_summary() mAP = ap_summary.pop('mAP') APs_log = '\n====== mAP ======\n' + f'* mAP: {mAP:<8.4f}\n' for cls_name, ap in ap_summary.items(): APs_log += f'- {cls_name}: {ap:<8.4f}\n' APs_log += '====== ====== ======\n' APs_log_colored = colored(APs_log, 'magenta') print(APs_log_colored)
def get_labels(ds, input_height, input_width, cls_map, full_save=False): labels = list() img_id = 0 def cls_idx2name(data_list): for data in data_list: cls_idx = data[4] cls_name = cls_map[cls_idx] data[4] = cls_name print( '\n====== ====== Get validation labels for mAP Calculation ====== ======\n' ) for step, batch_data in tqdm.tqdm(enumerate(ds, 1), total=len(ds), desc='Get val labels'): _, batch_labels = prep_voc_data(batch_data, input_height=input_height, input_width=input_width, val=True) for batch_label in batch_labels: batch_label = batch_label.numpy() img_id_arr = np.array([img_id] * len(batch_label), dtype=np.float32) cx_rel, cy_rel = batch_label[:, 0], batch_label[:, 1] w_rel, h_rel = batch_label[:, 2], batch_label[:, 3] cls_idx = batch_label[:, 4] cx, cy = cx_rel * input_width, cy_rel * input_height w, h = w_rel * input_width, h_rel * input_height left, top = cx - (w / 2), cy - (h / 2) right, bottom = cx + (w / 2), cy + (h / 2) converted_data = np.array( [left, top, right, bottom, cls_idx, img_id_arr], dtype=np.float32).T converted_data = np.around(converted_data).astype( np.int32).tolist() cls_idx2name(converted_data) labels.extend(converted_data) img_id += 1 print( '\n====== ====== Get validation labels for mAP Calculation (Completed) ======\n' ) # Save as pickle file if full_save: voc2012_val_labels_path = os.path.join(ProjectPath.DATASETS_DIR.value, 'voc_tfds', 'eval', f'val_labels_448_full.pickle') with open(voc2012_val_labels_path, 'wb') as f: pickle.dump(labels, f) return labels
def train(): for epoch in range(1, FLAGS.epochs+1): train_ds = voc.get_train_ds(shuffle=True, drop_remainder=True, sample_ratio=FLAGS.train_ds_sample_ratio) steps_per_epoch = len(train_ds) train_log_handler = TrainLogHandler(total_epochs=FLAGS.epochs, steps_per_epoch=steps_per_epoch, optimizer=optimizer, logger=logger) for step, batch_data in enumerate(train_ds, 1): batch_imgs, batch_labels = prep_voc_data(batch_data, input_height=cfg.input_height, input_width=cfg.input_width, val=False) losses = train_step(yolo, optimizer, batch_imgs, batch_labels, cfg) train_log_handler.logging(epoch=epoch, step=step, losses=losses, tb_writer=tb_train_writer) if epoch % FLAGS.val_step == 0: validation(epoch=epoch)
def validation(epoch): val_ds = voc.get_val_ds(sample_ratio=FLAGS.val_ds_sample_ratio) val_log_handler = ValLogHandler(total_epochs=FLAGS.epochs, logger=logger) val_losses_raw = { 'total_loss': tf.keras.metrics.MeanTensor(), 'coord_loss': tf.keras.metrics.MeanTensor(), 'obj_loss': tf.keras.metrics.MeanTensor(), 'noobj_loss': tf.keras.metrics.MeanTensor(), 'class_loss': tf.keras.metrics.MeanTensor(), } img_id = 0 val_preds = list() for step, batch_data in tqdm.tqdm(enumerate(val_ds, 1), total=len(val_ds), desc='Validation'): batch_imgs, batch_labels = prep_voc_data(batch_data, input_height=cfg.input_height, input_width=cfg.input_width, val=True) yolo_output_raw = yolo(batch_imgs, training=False) # ====== ====== ====== Calc Losses ====== ====== ====== batch_losses = { 'total_loss': 0., 'coord_loss': 0., 'obj_loss': 0., 'noobj_loss': 0., 'class_loss': 0., } for i in range(len(yolo_output_raw)): one_loss = get_losses(one_pred=yolo_output_raw[i], one_label=batch_labels[i], cfg=cfg) batch_losses['total_loss'] += one_loss['total_loss'] batch_losses['coord_loss'] += one_loss['coord_loss'] batch_losses['obj_loss'] += one_loss['obj_loss'] batch_losses['noobj_loss'] += one_loss['noobj_loss'] batch_losses['class_loss'] += one_loss['class_loss'] val_losses_raw['total_loss'].update_state(batch_losses['total_loss'] / len(batch_imgs)) val_losses_raw['coord_loss'].update_state(batch_losses['coord_loss'] / len(batch_imgs)) val_losses_raw['obj_loss'].update_state(batch_losses['obj_loss'] / len(batch_imgs)) val_losses_raw['noobj_loss'].update_state(batch_losses['noobj_loss'] / len(batch_imgs)) val_losses_raw['class_loss'].update_state(batch_losses['class_loss'] / len(batch_imgs)) # ====== ====== ====== mAP ====== ====== ====== yolo_boxes = yolo_output2boxes(yolo_output_raw, cfg.input_height, cfg.input_width, cfg.cell_size, cfg.boxes_per_cell) for i in range(len(yolo_boxes)): output_boxes = box_postp2use(yolo_boxes[i], cfg.nms_iou_thr, 0.) if output_boxes.size == 0: img_id += 1 continue for output_box in output_boxes: *pts, conf, cls_idx = output_box cls_name = VOC_CLS_MAP[cls_idx] val_preds.append([*map(round, pts), conf, cls_name, img_id]) img_id += 1 voc_ap = CalcVOCmAP(labels=val_labels, preds=val_preds, iou_thr=0.5, conf_thr=0.0) ap_summary = voc_ap.get_summary() val_losses = dict() for loss_name in val_losses_raw: val_losses[loss_name] = val_losses_raw[loss_name].result().numpy() val_losses_raw[loss_name].reset_states() val_log_handler.logging(epoch=epoch, losses=val_losses, APs=ap_summary, tb_writer=tb_val_writer) # ========= Tensorboard Image: prediction output visualization ========= # Training data output visualization sampled_voc_imgs, _ = prep_voc_data(train_viz_batch_data, input_height=cfg.input_height, input_width=cfg.input_width, val=True) sampled_voc_preds = yolo(sampled_voc_imgs) sampled_voc_output_boxes = yolo_output2boxes(sampled_voc_preds, cfg.input_height, cfg.input_width, cfg.cell_size, cfg.boxes_per_cell) sampled_imgs_num = FLAGS.tb_img_max_outputs if len(sampled_voc_imgs) > FLAGS.tb_img_max_outputs else len(sampled_voc_imgs) pred_viz_imgs = np.empty([sampled_imgs_num, cfg.input_height, cfg.input_width, 3], dtype=np.uint8) for idx in range(sampled_imgs_num): img = sampled_voc_imgs[idx].numpy() labels = box_postp2use(pred_boxes=sampled_voc_output_boxes[idx], nms_iou_thr=cfg.nms_iou_thr, conf_thr=cfg.conf_thr) pred_viz_imgs[idx] = viz_pred(img=img, labels=labels, cls_map=VOC_CLS_MAP) tb_write_imgs( tb_train_writer, name=f'[Train] Prediction (confidence_thr: {cfg.conf_thr}, nms_iou_thr: {cfg.nms_iou_thr})', imgs=pred_viz_imgs, step=epoch, max_outputs=FLAGS.tb_img_max_outputs, ) # Validation data output visualization sampled_voc_imgs, _ = prep_voc_data(val_viz_batch_data, input_height=cfg.input_height, input_width=cfg.input_width, val=True) sampled_voc_preds = yolo(sampled_voc_imgs) sampled_voc_output_boxes = yolo_output2boxes(sampled_voc_preds, cfg.input_height, cfg.input_width, cfg.cell_size, cfg.boxes_per_cell) sampled_imgs_num = FLAGS.tb_img_max_outputs if len(sampled_voc_imgs) > FLAGS.tb_img_max_outputs else len(sampled_voc_imgs) pred_viz_imgs = np.empty([sampled_imgs_num, cfg.input_height, cfg.input_width, 3], dtype=np.uint8) for idx in range(sampled_imgs_num): img = sampled_voc_imgs[idx].numpy() labels = box_postp2use(pred_boxes=sampled_voc_output_boxes[idx], nms_iou_thr=cfg.nms_iou_thr, conf_thr=cfg.conf_thr) pred_viz_imgs[idx] = viz_pred(img=img, labels=labels, cls_map=VOC_CLS_MAP) tb_write_imgs( tb_val_writer, name=f'[Val] Prediction (confidence_thr: {cfg.conf_thr}, nms_iou_thr: {cfg.nms_iou_thr})', imgs=pred_viz_imgs, step=epoch, max_outputs=FLAGS.tb_img_max_outputs, ) # ========= ================================================ ========= # Save checkpoint and pb if ap_summary['mAP'] >= val_metrics['mAP_best']: ckpt_manager.save(checkpoint_number=ckpt.step) yolo.save(filepath=VOC_PB_DIR, save_format='tf') val_metrics['mAP_best'] = ap_summary['mAP'] ckpt_log = '\n' + '=' * 100 + '\n' ckpt_log += f'* Save checkpoint file and pb file [{VOC_PB_DIR}]' ckpt_log += '\n' + '=' * 100 + '\n' logger.info(ckpt_log) print(colored(ckpt_log, 'green')) ckpt.step.assign_add(1)