def get_start_num(ori_img, diff_map, temp_fname, darknet_model, frcnn_model): save_format_try_image(ori_img, diff_map, temp_fname) yolo_input, frcnn_input = get_yolo_image(temp_fname), get_frcnn_image( temp_fname) list_boxes = darknet_model(yolo_input) yolo_results = post_process(list_boxes) yolo_num = len(yolo_results) frcnn_results, scores, _ = frcnn_model( img=[frcnn_input], img_metas=[[{ 'filename': '', 'ori_filename': '', 'ori_shape': (500, 500, 3), 'img_shape': (800, 800, 3), 'pad_shape': (800, 800, 3), 'scale_factor': np.array([1.6, 1.6, 1.6, 1.6]), 'flip': False, 'flip_direction': None, 'img_norm_cfg': { 'mean': np.array([123.675, 116.28, 103.53]), 'std': np.array([58.395, 57.12, 57.375]), 'to_rgb': True } }]], return_loss=False, rescale=False) frcnn_results = np.concatenate(frcnn_results) frcnn_num = np.sum(frcnn_results[:, 4] > 0.3) return yolo_num, frcnn_num, yolo_results, frcnn_results
def predict(self, image): org_image = np.copy(image) org_h, org_w, _ = org_image.shape original_image_size = org_image.shape[:2] image_data = utils.image_preporcess(image, [self.input_size, self.input_size]) image_data = image_data[np.newaxis, ...] detections = self.sess.run(self.det, feed_dict={self.input_data: image_data}) detections = utils.post_process(detections, original_image_size, [cfg.input_image_h, cfg.input_image_w], cfg.down_ratio, cfg.score_threshold) bboxes = [] scores = [0] classes = [0] if cfg.use_nms: cls_in_img = list(set(detections[:, 5])) results = [] for c in cls_in_img: cls_mask = (detections[:, 5] == c) classified_det = detections[cls_mask] classified_bboxes = classified_det[:, :4] classified_scores = classified_det[:, 4] inds = utils.py_nms(classified_bboxes, classified_scores, max_boxes=50, iou_thresh=0.5) results.extend(classified_det[inds]) results = np.asarray(results) if len(results) != 0: bboxes = results[:, 0:4] scores = results[:, 4] classes = results[:, 5] #bboxes_draw_on_img(original_image, classes, scores, bboxes, class_names) else: bboxes = detections[:, 0:4] scores = detections[:, 4] classes = detections[:, 5] #bboxes_draw_on_img(original_image, classes, scores, bboxes, class_names) return bboxes, scores, classes
cnt += len(preds) if cnt % opt.display_freq == 0: print('[' + str(cnt) + '/' + str(len(dataset)) + ']' + ' Time: %.2f' % (time.time() - iter_start_time)) for k in range(len(preds)): ls.append((ids[k], preds[k])) iter_start_time = time.time() groups = groupby(ls, key=lambda x: x[0]) for id, group in groups: arrayls = [] for item in group: arrayls.append(item[1]) output.write( post_process(pred=np.array(arrayls, dtype='float32'), id=id, opt=opt)) os.system('gzip ' + os.path.join(model.save_dir, "test", 'pred' + timestr + '.txt')) # Detach the memory save_dir = model.save_dir model.cpu() model = None dataset = None lr_loader = None print('Prediction Saved in ' + os.path.join(save_dir, "test", 'pred' + timestr + '.txt.gz')) print('>>Submit Now: (large/small/no)?')
def train(): # define dataset num_train_imgs = len(open(cfg.train_data_file, 'r').readlines()) num_train_batch = int(math.ceil(float(num_train_imgs) / cfg.batch_size)) num_test_imgs = len(open(cfg.test_data_file, 'r').readlines()) num_test_batch = int(math.ceil(float(num_test_imgs) / 1)) train_dataset = tf.data.TextLineDataset(cfg.train_data_file) train_dataset = train_dataset.shuffle(num_train_imgs) train_dataset = train_dataset.batch(cfg.batch_size) train_dataset = train_dataset.map(lambda x: tf.py_func(get_data, inp=[x, True], Tout=[tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.int32, tf.int32]), num_parallel_calls=6) train_dataset = train_dataset.prefetch(3) test_dataset = tf.data.TextLineDataset(cfg.test_data_file) test_dataset = test_dataset.batch(1) test_dataset = test_dataset.map(lambda x: tf.py_func(get_data, inp=[x, False], Tout=[tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.int32, tf.int32]), num_parallel_calls=1) test_dataset = test_dataset.prefetch(1) iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes) trainset_init_op = iterator.make_initializer(train_dataset) testset_init_op = iterator.make_initializer(test_dataset) input_data, batch_hm, batch_wh, batch_reg, batch_reg_mask, batch_ind, batch_img_size, batch_id = iterator.get_next() input_data.set_shape([None, None, None, 3]) batch_hm.set_shape([None, None, None, None]) batch_wh.set_shape([None, None, None]) batch_reg.set_shape([None, None, None]) batch_reg_mask.set_shape([None, None]) batch_ind.set_shape([None, None]) batch_img_size.set_shape([None, None]) batch_id.set_shape([None]) # training flag is_training = tf.placeholder(dtype=tf.bool, name='is_training') # difine model and loss model = CenterNet(input_data, is_training, "dla_34") hm = model.pred_hm wh = model.pred_wh reg = model.pred_reg from utils.decode import decode det = decode(hm, wh, reg, K=cfg.max_objs) with tf.variable_scope('loss'): # hm_loss, wh_loss, reg_loss = model.compute_loss(batch_hm, batch_wh, batch_reg, batch_reg_mask, batch_ind) hm_loss, wh_loss, reg_loss = model.compute_loss_pcl(batch_hm, batch_wh, batch_reg, batch_reg_mask, batch_ind) total_loss = hm_loss + wh_loss + reg_loss # define train op if cfg.lr_type == "CosineAnnealing": learning_rate = 0.0001 global_step = tf.Variable(1.0, dtype=tf.float64, trainable=False, name='global_step') # warmup_steps = tf.constant(cfg.warm_up_epochs * num_train_batch, dtype=tf.float64, name='warmup_steps') # train_steps = tf.constant(cfg.epochs * num_train_batch, dtype=tf.float64, name='train_steps') # learning_rate = tf.cond( # pred=global_step < warmup_steps, # true_fn=lambda: global_step / warmup_steps * cfg.init_lr, # false_fn=lambda: cfg.end_lr + 0.5 * (cfg.init_lr - cfg.end_lr) * # (1 + tf.cos( # (global_step - warmup_steps) / (train_steps - warmup_steps) * np.pi)) # ) global_step_update = tf.assign_add(global_step, 1.0) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(total_loss) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): with tf.control_dependencies([optimizer, global_step_update]): train_op = tf.no_op() else: global_step = tf.Variable(0, trainable=False) if cfg.lr_type == "exponential": learning_rate = 0.0001 # learning_rate = tf.train.exponential_decay(cfg.lr, # global_step, # cfg.lr_decay_steps, # cfg.lr_decay_rate, # staircase=True) elif cfg.lr_type == "piecewise": learning_rate = 0.0001 # learning_rate = tf.train.piecewise_constant(global_step, cfg.lr_boundaries, cfg.lr_piecewise) optimizer = tf.train.AdamOptimizer(learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(total_loss, global_step=global_step) saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) with tf.Session() as sess: with tf.name_scope('summary'): tf.summary.scalar("learning_rate", learning_rate) tf.summary.scalar("hm_loss", hm_loss) tf.summary.scalar("wh_loss", wh_loss) tf.summary.scalar("reg_loss", reg_loss) tf.summary.scalar("total_loss", total_loss) logdir = "./log_dla/" if os.path.exists(logdir): shutil.rmtree(logdir) os.mkdir(logdir) write_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(logdir, graph=sess.graph) # train sess.run(tf.global_variables_initializer()) # if cfg.dla_pretrain: # saver.restore(sess, './checkpoint/centernet_test_loss=3.1386.ckpt-79') for epoch in range(1, 1 + cfg.epochs): pbar = tqdm(range(num_train_batch)) train_epoch_loss, test_epoch_loss = [], [] sess.run(trainset_init_op) for i in pbar: _, summary, train_step_loss, global_step_val, _hm_loss, _wh_loss, _reg_loss = sess.run( [train_op, write_op, total_loss, global_step, hm_loss, wh_loss, reg_loss], feed_dict={is_training: True}) train_epoch_loss.append(train_step_loss) summary_writer.add_summary(summary, global_step_val) pbar.set_description("train loss: %.2f" % train_step_loss) if i % 20 == 0: print("train loss: %.2f hm_loss: %.2f wh_loss:%2f reg_loss:%2f learning_rate:%f" % (train_step_loss, _hm_loss, _wh_loss, _reg_loss, learning_rate) ) print("begining test") sess.run(testset_init_op) val_preds = [] # for j in range(num_test_batch ): # test_step_loss = sess.run(total_loss, feed_dict={is_training: False}) # test_epoch_loss.append(test_step_loss) train_epoch_loss = np.mean(train_epoch_loss) # train_epoch_loss, test_epoch_loss = np.mean(train_epoch_loss), np.mean(test_epoch_loss) ckpt_file = "./checkpoint/centernet_train_loss=%.4f.ckpt" % train_epoch_loss log_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print("=> Epoch: %2d Time: %s Train loss: %.2f Saving %s ..." % (epoch, log_time, train_epoch_loss, ckpt_file)) saver.save(sess, ckpt_file, global_step=epoch) if epoch % cfg.eval_epoch == 0 and epoch > 0: sess.run(testset_init_op) for j in range(num_test_batch): detections, _batch_img_size, _batch_id = sess.run([det, batch_img_size, batch_id], feed_dict={is_training: False}) ori_h = _batch_img_size[0][1] ori_w = _batch_img_size[0][0] detect_post = post_process(detections, (ori_h, ori_w), [cfg.input_image_h, cfg.input_image_w], cfg.down_ratio, cfg.score_threshold) id = _batch_id[0] detect_per_img = get_preds_gpu(detect_post, id) val_preds.extend(detect_per_img) rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(), AverageMeter() info = "" gt_dict = parse_gt_rec(cfg.test_data_file, [cfg.input_image_h, cfg.input_image_w], cfg.letterbox_resize) for ii in range(cfg.num_classes): from utils.utils import voc_eval npos, nd, rec, prec, ap = voc_eval(gt_dict, val_preds, ii, iou_thres=cfg.score_threshold, use_07_metric=cfg.use_voc_07_metric) info += 'EVAL: Class {}: Recall: {:.4f}, Precision: {:.4f}, AP: {:.4f}\n'.format(ii, rec, prec, ap) rec_total.update(rec, npos) prec_total.update(prec, nd) ap_total.update(ap, 1) mAP = ap_total.average info += 'EVAL: Recall: {:.4f}, Precison: {:.4f}, mAP: {:.4f}\n'.format(rec_total.average, prec_total.average, mAP) print(info)
def run_seg(config_file_seg): os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ------------------------------------------------------------------------------------------------------------ # 2. segmentation inference # ------------------------------------------------------------------------------------------------------------ config = load_config(config_file_seg) model = getattr(smp, config.model.arch)( encoder_name=config.model.encoder, encoder_weights=config.model.pretrained, classes=config.data.num_classes, activation=None, ) if os.path.exists('cls_preds.csv'): testloader = make_loader(data_folder=config.data.test_dir, df_path='cls_preds.csv', phase='filtered_test', batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms( config.transforms.test)) else: testloader = make_loader(data_folder=config.data.test_dir, df_path=config.data.sample_submission_path, phase='test', batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms( config.transforms.test)) model.to(config.device) model.eval() checkpoint = load_checkpoint(f"{config.work_dir}/checkpoints/best.pth") model.load_state_dict(checkpoint['model_state_dict']) if os.path.exists(config.work_dir + '/threshold_search.json'): with open(config.work_dir + '/threshold_search.json') as json_file: data = json.load(json_file) df = pd.DataFrame(data) min_sizes = list(df.T.idxmax().values.astype(int)) print('load best threshold from validation:', min_sizes) else: min_sizes = config.test.min_size print('load default threshold:', min_sizes) predictions = [] with torch.no_grad(): for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)): batch_images = batch_images.to(config.device) batch_preds = predict_batch(model, batch_images, tta=config.test.tta) for fname, preds in zip(batch_fnames, batch_preds): if config.data.num_classes == 4: for cls in range(preds.shape[0]): mask = preds[cls, :, :] mask, num = post_process(mask, config.test.best_threshold, min_sizes[cls]) rle = mask2rle(mask) name = fname + f"_{cls + 1}" predictions.append([name, rle]) else: # == 5 for cls in range(1, 5): mask = preds[cls, :, :] mask, num = post_process(mask, config.test.best_threshold, min_sizes[cls]) rle = mask2rle(mask) name = fname + f"_{cls}" predictions.append([name, rle]) # ------------------------------------------------------------------------------------------------------------ # submission # ------------------------------------------------------------------------------------------------------------ df = pd.DataFrame(predictions, columns=['ImageId_ClassId', 'EncodedPixels']) df.to_csv(config.work_dir + "/submission.csv", index=False)
def ensemble(): os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ------------------------------------------------------------------------------------------------------------ # parmeters and configs # ------------------------------------------------------------------------------------------------------------ config_paths320 = [ 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold0.yml', 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold1.yml', 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold2.yml', 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold3.yml', 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold4.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold0.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold1.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold2.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold3.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold4.yml', ] config_paths384 = [ 'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold0.yml', 'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold1.yml', 'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold2.yml', 'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold3.yml', 'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold4.yml', 'config/seg/048_resnet34_FPN_img384_mixup_fold0.yml', 'config/seg/048_resnet34_FPN_img384_mixup_fold1.yml', 'config/seg/048_resnet34_FPN_img384_mixup_fold2.yml', 'config/seg/048_resnet34_FPN_img384_mixup_fold3.yml', 'config/seg/048_resnet34_FPN_img384_mixup_fold4.yml', ] LABEL_THRESHOLDS = [0.68, 0.69, 0.69, 0.67] MASK_THRESHOLDS = [0.31, 0.36, 0.31, 0.34] MIN_SIZES = [7500, 10000, 7500, 7500] WEIGHTS = [0.5, 0.5] # ------------------------------------------------------------------------------------------------------------ # # ------------------------------------------------------------------------------------------------------------ config = load_config('config/base_config.yml') def get_model_and_loader(config_paths): config = load_config(config_paths[0]) models = [] for c in config_paths: models.append(load_model(c)) model = MultiSegModels(models) testloader = make_loader( data_folder=config.data.test_dir, df_path=config.data.sample_submission_path, phase='test', img_size=(config.data.height, config.data.width), batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms(config.transforms.test)) return model, testloader model320, loader320 = get_model_and_loader(config_paths320) model384, loader384 = get_model_and_loader(config_paths384) predictions = [] with torch.no_grad(): for (batch_fnames320, batch_images320), (batch_fnames384, batch_images384) in tqdm( zip(loader320, loader384)): batch_images320 = batch_images320.to(config.device) batch_images384 = batch_images384.to(config.device) batch_preds320 = predict_batch(model320, batch_images320, tta=config.test.tta) batch_preds384 = predict_batch(model384, batch_images384, tta=config.test.tta) batch_preds320 = resize_batch_images(batch_preds320, SUB_HEIGHT, SUB_WIDTH) batch_preds384 = resize_batch_images(batch_preds384, SUB_HEIGHT, SUB_WIDTH) batch_preds = batch_preds320 * \ WEIGHTS[0] + batch_preds384 * WEIGHTS[1] batch_labels320 = torch.nn.functional.adaptive_max_pool2d( torch.sigmoid(torch.Tensor(batch_preds320)), 1).view(batch_preds320.shape[0], -1) batch_labels384 = torch.nn.functional.adaptive_max_pool2d( torch.sigmoid(torch.Tensor(batch_preds384)), 1).view(batch_preds384.shape[0], -1) batch_labels = batch_labels320 * \ WEIGHTS[0] + batch_labels384 * WEIGHTS[1] for fname, preds, labels in zip(batch_fnames320, batch_preds, batch_labels): for cls in range(4): if labels[cls] <= LABEL_THRESHOLDS[cls]: pred = np.zeros(preds[cls, :, :].shape) else: pred, _ = post_process(preds[cls, :, :], MASK_THRESHOLDS[cls], MIN_SIZES[cls], height=SUB_HEIGHT, width=SUB_WIDTH) rle = mask2rle(pred) cls_name = INV_CLASSES[cls] name = fname + f"_{cls_name}" predictions.append([name, rle]) # ------------------------------------------------------------------------------------------------------------ # submission # ------------------------------------------------------------------------------------------------------------ sub_df = pd.DataFrame(predictions, columns=['Image_Label', 'EncodedPixels']) sample_submission = pd.read_csv(config.data.sample_submission_path) df_merged = pd.merge(sample_submission, sub_df, on='Image_Label', how='left') df_merged.fillna('', inplace=True) df_merged['EncodedPixels'] = df_merged['EncodedPixels_y'] df_merged = df_merged[['Image_Label', 'EncodedPixels']] df_merged.to_csv("submission.csv", index=False) if 'COLAB_GPU' in os.environ: config.work_dir = '/content/drive/My Drive/kaggle_cloud/' elif 'KAGGLE_WORKING_DIR' in os.environ: config.work_dir = '/kaggle/working/' else: config.work_dir = '.' df_merged.to_csv(config.work_dir + '/submission.csv', index=False)
def run_seg(config_file_seg): os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ------------------------------------------------------------------------------------------------------------ # 2. segmentation inference # ------------------------------------------------------------------------------------------------------------ config = load_config(config_file_seg) if 'COLAB_GPU' in os.environ: config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir elif 'KAGGLE_WORKING_DIR' in os.environ: config.work_dir = '/kaggle/working/' + config.work_dir if os.path.exists('cls_preds.csv'): testloader = make_loader( data_folder=config.data.test_dir, df_path='cls_preds.csv', phase='filtered_test', img_size=(config.data.height, config.data.width), batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms(config.transforms.test)) else: testloader = make_loader( data_folder=config.data.test_dir, df_path=config.data.sample_submission_path, phase='test', img_size=(config.data.height, config.data.width), batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms(config.transforms.test)) model = load_model(config_file_seg) if os.path.exists(config.work_dir + '/threshold_search.json'): with open(config.work_dir + '/threshold_search.json') as json_file: data = json.load(json_file) df = pd.DataFrame(data) min_sizes = list(df.T.idxmax().values.astype(int)) print('load best threshold from validation:', min_sizes) else: min_sizes = config.test.min_size print('load default threshold:', min_sizes) predictions = [] with torch.no_grad(): for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)): batch_images = batch_images.to(config.device) batch_preds = predict_batch(model, batch_images, tta=config.test.tta) for fname, preds in zip(batch_fnames, batch_preds): for cls in range(preds.shape[0]): pred, _ = post_process( preds[cls, :, :], config.test.best_threshold, min_sizes[cls], height=config.transforms.test.Resize.height, width=config.transforms.test.Resize.width) pred = cv2.resize(pred, (SUB_WIDTH, SUB_HEIGHT)) pred = (pred > 0.5).astype(int) rle = mask2rle(pred) cls_name = INV_CLASSES[cls] name = fname + f"_{cls_name}" predictions.append([name, rle]) # ------------------------------------------------------------------------------------------------------------ # submission # ------------------------------------------------------------------------------------------------------------ df = pd.DataFrame(predictions, columns=['Image_Label', 'EncodedPixels']) df.to_csv(config.work_dir + "/submission.csv", index=False)
def train(): # define dataset num_train_imgs = len(open(cfg.train_data_file, 'r').readlines()) num_train_batch = int(math.ceil(float(num_train_imgs) / cfg.batch_size)) num_test_imgs = len(open(cfg.test_data_file, 'r').readlines()) num_test_batch = int(math.ceil(float(num_test_imgs) / 1)) train_dataset = tf.data.TextLineDataset(cfg.train_data_file) train_dataset = train_dataset.shuffle(num_train_imgs) train_dataset = train_dataset.batch(cfg.batch_size) train_dataset = train_dataset.map(lambda x: tf.py_func( get_data, inp=[x, True], Tout=[ tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf. float32, tf.int32, tf.int32 ]), num_parallel_calls=6) train_dataset = train_dataset.prefetch(3) test_dataset = tf.data.TextLineDataset(cfg.test_data_file) test_dataset = test_dataset.batch(1) test_dataset = test_dataset.map(lambda x: tf.py_func( get_data, inp=[x, False], Tout=[ tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf. float32, tf.int32, tf.int32 ]), num_parallel_calls=1) test_dataset = test_dataset.prefetch(1) iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes) trainset_init_op = iterator.make_initializer(train_dataset) testset_init_op = iterator.make_initializer(test_dataset) input_data, hm, wh, reg, reg_mask, ind, img_size, id = iterator.get_next() batch_input_data = [] batch_hm = [] batch_wh = [] batch_reg = [] batch_reg_mask = [] batch_ind = [] batch_img_size = [] batch_id = [] for i in range(cfg.NUM_GPU): start = i * (cfg.batch_size // cfg.NUM_GPU) end = (i + 1) * (cfg.batch_size // cfg.NUM_GPU) single_input_data = input_data[start:end, :, :, :] single_hm = hm[start:end, :, :, :] single_wh = wh[start:end, :, :] single_reg = reg[start:end, :, :] single_reg_mask = reg_mask[start:end, :] single_ind = ind[start:end, :] single_img_size = img_size[start:end:, :] single_id = id[start:end, :] batch_input_data.append(single_input_data) batch_hm.append(single_hm) batch_wh.append(single_wh) batch_reg.append(single_reg) batch_reg_mask.append(single_reg_mask) batch_ind.append(single_ind) batch_img_size.append(single_img_size) batch_id.append(single_id) batch_input_data[i].set_shape([None, None, None, 3]) batch_hm[i].set_shape([None, None, None, None]) batch_wh[i].set_shape([None, None, None]) batch_reg[i].set_shape([None, None, None]) batch_reg_mask[i].set_shape([None, None]) batch_ind[i].set_shape([None, None]) batch_img_size[i].set_shape([None, None]) batch_id[i].set_shape([None]) # difine model and loss with tf.device('/cpu:0'): tower_grads = [] hm_loss = [] wh_loss = [] reg_loss = [] total_loss = [] hm_pred_list = [] wh_pred_list = [] reg_pred_list = [] pred_det = [] # training flag is_training = tf.placeholder(dtype=tf.bool, name='is_training') # with tf.variable_scope(tf.get_variable_scope()): with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE) as scope: for i in range(cfg.NUM_GPU): print("current gpu is", i) with tf.device('/gpu:%d' % i): model = CenterNet(batch_input_data[i], is_training, "dla_34") hm_pred = model.pred_hm wh_pred = model.pred_wh reg_pred = model.pred_reg hm_pred_list.append(hm_pred) wh_pred_list.append(wh_pred) reg_pred_list.append(reg_pred) det = decode(hm_pred_list[i], wh_pred_list[i], reg_pred_list[i], K=cfg.max_objs) pred_det.append(det) with tf.variable_scope('loss'): l2_loss = tf.losses.get_regularization_loss() # hm_loss[i], wh_loss[i], reg_loss[i] = model.compute_loss(batch_hm[i], batch_wh[i], batch_reg[i], batch_reg_mask[i], batch_ind[i]) hm_loss_single, wh_loss_single, reg_loss_single = model.compute_loss( batch_hm[i], batch_wh[i], batch_reg[i], batch_reg_mask[i], batch_ind[i]) hm_loss.append(hm_loss_single) wh_loss.append(wh_loss_single) reg_loss.append(reg_loss_single) total_loss_single = hm_loss[i] + wh_loss[i] + reg_loss[ i] + l2_loss total_loss.append(total_loss_single) # define train op global_step = tf.Variable(0, trainable=False) if cfg.lr_type == "exponential": learning_rate = tf.train.exponential_decay( cfg.lr, global_step, cfg.lr_decay_steps, cfg.lr_decay_rate, staircase=True) elif cfg.lr_type == "piecewise": learning_rate = tf.train.piecewise_constant( global_step, cfg.lr_boundaries, cfg.lr_piecewise) optimizer = tf.train.AdamOptimizer(learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): grads = optimizer.compute_gradients(total_loss[i]) clip_grad_var = [ gv if gv[0] is None else [tf.clip_by_norm(gv[0], 100.), gv[1]] for gv in grads ] tower_grads.append(clip_grad_var) last_loss = tf.reduce_mean(total_loss) if len(tower_grads) > 1: clip_grad_var = sum_gradients(tower_grads) else: clip_grad_var = tower_grads[0] train_op = optimizer.apply_gradients(clip_grad_var, global_step=global_step) saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: with tf.name_scope('summary'): tf.summary.scalar("learning_rate", learning_rate) tf.summary.scalar("hm_loss", tf.reduce_mean(hm_loss)) tf.summary.scalar("wh_loss", tf.reduce_mean(wh_loss)) tf.summary.scalar("reg_loss", tf.reduce_mean(reg_loss)) tf.summary.scalar("total_loss", tf.reduce_mean(total_loss)) logdir = "./log_dla/" if os.path.exists(logdir): shutil.rmtree(logdir) os.mkdir(logdir) write_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(logdir, graph=sess.graph) sess.run(tf.global_variables_initializer()) if cfg.pre_train: saver.restore( sess, './checkpoint/centernet_train_epoch_loss=313.7357.ckpt-6' ) for epoch in range(1, 1 + cfg.epochs): pbar = tqdm(range(num_train_batch)) train_epoch_loss, test_epoch_loss = [], [] sess.run(trainset_init_op) for i in pbar: _, summary, train_step_loss, global_step_val = sess.run( [train_op, write_op, last_loss, global_step], feed_dict={is_training: True}) train_epoch_loss.append(train_step_loss) summary_writer.add_summary(summary, global_step_val) pbar.set_description("train loss: %.2f" % train_step_loss) # sess.run(testset_init_op) # for j in range(num_test_batch ): # test_step_loss = sess.run(last_loss, feed_dict={is_training: False}) # test_epoch_loss.append(test_step_loss) # train_epoch_loss, test_epoch_loss = np.mean(train_epoch_loss), np.mean(test_epoch_loss) train_epoch_loss = np.mean(train_epoch_loss) ckpt_file = "./checkpoint/centernet_train_epoch_loss=%.4f.ckpt" % train_epoch_loss log_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print( "=> Epoch: %2d Time: %s Train loss: %.2f Saving %s ..." % (epoch, log_time, train_epoch_loss, ckpt_file)) saver.save(sess, ckpt_file, global_step=epoch) if epoch % cfg.eval_epoch == 0 and epoch > 0: print("begining test") sess.run(testset_init_op) val_preds = [] for j in tqdm(range(num_test_batch)): detections, _batch_img_size, _batch_id = sess.run( [pred_det[0], img_size, id], feed_dict={is_training: False}) # print("detecttiion is", detections) # print("_batch_img_size is", _batch_img_size) # print("id is", _batch_id) ori_h = _batch_img_size[0][1] ori_w = _batch_img_size[0][0] detect_post = post_process( detections, (ori_h, ori_w), [cfg.input_image_h, cfg.input_image_w], cfg.down_ratio, cfg.score_threshold) id_test = _batch_id[0] detect_per_img = get_preds_gpu( detect_post, id_test) val_preds.extend(detect_per_img) rec_total, prec_total, ap_total = AverageMeter( ), AverageMeter(), AverageMeter() info = "" gt_dict = parse_gt_rec( cfg.test_data_file, [cfg.input_image_h, cfg.input_image_w], cfg.letterbox_resize) for ii in range(cfg.num_classes): from utils.utils import voc_eval npos, nd, rec, prec, ap = voc_eval( gt_dict, val_preds, ii, iou_thres=cfg.score_threshold, use_07_metric=cfg.use_voc_07_metric) info += 'EVAL: Class {}: Recall: {:.4f}, Precision: {:.4f}, AP: {:.4f}\n'.format( ii, rec, prec, ap) rec_total.update(rec, npos) prec_total.update(prec, nd) ap_total.update(ap, 1) mAP = ap_total.average info += 'EVAL: Recall: {:.4f}, Precison: {:.4f}, mAP: {:.4f}\n'.format( rec_total.average, prec_total.average, mAP) print(info)
runner = SupervisedRunner( model=tta_model, device=get_device()) for i, test_batch in enumerate(tqdm.tqdm(loaders['test'])): test_batch = test_batch[0].cuda() runner_out = runner.predict_batch( {"features": test_batch})['logits'] gc.collect() for i, batch in enumerate(runner_out): for probability in batch: probability = probability.cpu().detach().numpy() if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=( 525, 350), interpolation=cv2.INTER_LINEAR) predict, num_predict = post_process( sigmoid(probability), class_params[f"{image_id % 4}"][0], class_params[f"{image_id % 4}"][1]) if num_predict == 0: encoded_pixels.append('') else: r = mask2rle(predict) encoded_pixels.append(r) image_id += 1 del model gc.collect() torch.cuda.empty_cache() assert len(encoded_pixels) == 14792 sub['EncodedPixels'] = encoded_pixels
def run_seg(config_file_seg): os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ------------------------------------------------------------------------------------------------------------ # 2. segmentation inference # ------------------------------------------------------------------------------------------------------------ config = load_config(config_file_seg) validloader = make_loader( data_folder=config.data.train_dir, df_path=config.data.train_df_path, phase='valid', batch_size=config.train.batch_size, num_workers=config.num_workers, idx_fold=config.data.params.idx_fold, transforms=get_transforms(config.transforms.test), num_classes=config.data.num_classes, ) # create segmentation model with pre-trained encoder model = getattr(smp, config.model.arch)( encoder_name=config.model.encoder, encoder_weights=config.model.pretrained, classes=config.data.num_classes, activation=None, ) model.to(config.device) model.eval() checkpoint = load_checkpoint(f"{config.work_dir}/checkpoints/best.pth") model.load_state_dict(checkpoint['model_state_dict']) all_dice = {} min_sizes = [100, 300, 500, 750, 1000, 1500, 2000, 3000] for min_size in min_sizes: all_dice[min_size] = {} for cls in range(config.data.num_classes): all_dice[min_size][cls] = [] with torch.no_grad(): for i, (batch_images, batch_masks) in enumerate(tqdm(validloader)): batch_images = batch_images.to(config.device) batch_preds = predict_batch(model, batch_images, tta=config.test.tta) batch_masks = batch_masks.cpu().numpy() for masks, preds in zip(batch_masks, batch_preds): for cls in range(config.data.num_classes): for min_size in min_sizes: pred, _ = post_process(preds[cls, :, :], config.test.best_threshold, min_size) mask = masks[cls, :, :] all_dice[min_size][cls].append(dice_score(pred, mask)) for cls in range(config.data.num_classes): for min_size in min_sizes: all_dice[min_size][cls] = sum(all_dice[min_size][cls]) / len( all_dice[min_size][cls]) dict_to_json(all_dice, config.work_dir + '/threshold_search.json') if config.data.num_classes == 4: defect_class = cls + 1 else: defect_class = cls print('average dice score for class{} for min_size {}: {}'.format( defect_class, min_size, all_dice[min_size][cls]))
model.load_state_dict(torch.load(cfg.load_model)) model.eval() dataloader = DataLoader(ImageFolder(cfg.test_dir), batch_size=1, shuffle=False, num_workers=1) # 为每个类名配置不同的颜色 hsv_tuples = [(x / len(cfg.class_name), 1., 1.) for x in range(len(cfg.class_name))] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) imgs = [] # 图片保存路径 img_detections = [] # 每张图片的检测结果 for img_paths, input_imgs in dataloader: input_imgs = input_imgs.cuda() with torch.no_grad(): a = time.time() hm, wh, offset = model(input_imgs) print("forward耗时: ", (time.time()-a)*1000) results = post_process(hm, wh, offset, 50) imgs.extend(img_paths) img_detections.extend(results) for path, detections in zip(imgs, img_detections): img = Image.open(path) w, h = img.size font = ImageFont.truetype(font='font/FiraMono-Medium.otf', size=16) if detections is not None: # 先将网络输入尺寸下的坐标转换成max(w, h)下的坐标 detections[:, :4] *= max(h, w) / cfg.input_size # 如果h<w,则是一个宽边图,需要在y轴上减去(w - h) / 2,下同 if h < w: detections[:, 1:4:2] -= (w - h) / 2 else: detections[:, 0:3:2] -= (h - w) / 2
def validation(config_file_seg): os.environ['CUDA_VISIBLE_DEVICES'] = '0' config = load_config(config_file_seg) if 'COLAB_GPU' in os.environ: config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir elif 'KAGGLE_WORKING_DIR' in os.environ: config.work_dir = '/kaggle/working/' + config.work_dir validloader = make_loader( data_folder=config.data.train_dir, df_path=config.data.train_df_path, phase='valid', img_size=(config.data.height, config.data.width), batch_size=config.test.batch_size, num_workers=config.num_workers, idx_fold=config.data.params.idx_fold, transforms=get_transforms(config.transforms.test), num_classes=config.data.num_classes, ) model = load_model(config_file_seg) min_sizes = np.arange(0, 20000, 5000) label_thresholds = [0.6, 0.7, 0.8] mask_thresholds = [0.2, 0.3, 0.4] all_dice = np.zeros( (4, len(label_thresholds), len(mask_thresholds), len(min_sizes))) count = 0 with torch.no_grad(): for i, (batch_images, batch_masks) in enumerate(tqdm(validloader)): batch_images = batch_images.to(config.device) batch_preds = predict_batch(model, batch_images, tta=config.test.tta) batch_labels = torch.nn.functional.adaptive_max_pool2d( torch.sigmoid(torch.Tensor(batch_preds)), 1).view(batch_preds.shape[0], -1) batch_masks = batch_masks.cpu().numpy() batch_labels = batch_labels.cpu().numpy() batch_masks = resize_batch_images(batch_masks, SUB_HEIGHT, SUB_WIDTH) batch_preds = resize_batch_images(batch_preds, SUB_HEIGHT, SUB_WIDTH) for labels, masks, preds in zip(batch_labels, batch_masks, batch_preds): for cls in range(config.data.num_classes): for i, label_th in enumerate(label_thresholds): for j, mask_th in enumerate(mask_thresholds): for k, min_size in enumerate(min_sizes): if labels[cls] <= label_th: pred = np.zeros(preds[cls, :, :].shape) else: pred, _ = post_process(preds[cls, :, :], mask_th, min_size, height=SUB_HEIGHT, width=SUB_WIDTH) mask = masks[cls, :, :] dice = dice_score(pred, mask) all_dice[cls, i, j, k] += dice count += 1 all_dice = all_dice / (count) np.save('all_dice', all_dice) parameters = {} parameters['label_thresholds'] = [] parameters['mask_thresholds'] = [] parameters['min_sizes'] = [] parameters['dice'] = [] cv_score = 0 for cls in range(4): i, j, k = np.where((all_dice[cls] == all_dice[cls].max())) parameters['label_thresholds'].append(float(label_thresholds[i[0]])) parameters['mask_thresholds'].append(float(mask_thresholds[j[0]])) parameters['min_sizes'].append(int(min_sizes[k[0]])) parameters['dice'].append(float(all_dice[cls].max())) cv_score += all_dice[cls].max() / 4 print('cv_score:', cv_score) dict_to_json(parameters, config.work_dir + '/parameters.json') print(pd.DataFrame(parameters))
def run_seg(config_dir): os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ------------------------------------------------------------------------------------------------------------ # 2. segmentation inference # ------------------------------------------------------------------------------------------------------------ config_root = Path(config_dir) / 'seg' config_paths = [config_root / p for p in os.listdir(config_root)] base_config_paths = [ Path(config_dir) / p for p in os.listdir(config_dir) if 'yml' in p ] config = load_config(base_config_paths[0]) models = [] for c in config_paths: models.append(load_model(c)) model = MultiSegModels(models) if os.path.exists('cls_preds.csv'): testloader = make_loader(data_folder=config.data.test_dir, df_path='cls_preds.csv', phase='filtered_test', batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms( config.transforms.test)) else: testloader = make_loader(data_folder=config.data.test_dir, df_path=config.data.sample_submission_path, phase='test', batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms( config.transforms.test)) if os.path.exists(config.work_dir + '/threshold_search.json'): with open(config.work_dir + '/threshold_search.json') as json_file: data = json.load(json_file) df = pd.DataFrame(data) min_sizes = list(df.T.idxmax().values.astype(int)) print('load best threshold from validation:', min_sizes) else: min_sizes = config.test.min_size print('load default threshold:', min_sizes) predictions = [] with torch.no_grad(): for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)): batch_images = batch_images.to(config.device) batch_preds = predict_batch(model, batch_images, tta=config.test.tta) for fname, preds in zip(batch_fnames, batch_preds): for cls in range(preds.shape[0]): mask = preds[cls, :, :] mask, num = post_process(mask, config.test.best_threshold, min_sizes[cls]) rle = mask2rle(mask) name = fname + f"_{cls + 1}" predictions.append([name, rle]) # ------------------------------------------------------------------------------------------------------------ # submission # ------------------------------------------------------------------------------------------------------------ sub_df = pd.DataFrame(predictions, columns=['ImageId_ClassId', 'EncodedPixels']) sample_submission = pd.read_csv(config.data.sample_submission_path) df_merged = pd.merge(sample_submission, sub_df, on='ImageId_ClassId', how='left') df_merged.fillna('', inplace=True) df_merged['EncodedPixels'] = df_merged['EncodedPixels_y'] df_merged = df_merged[['ImageId_ClassId', 'EncodedPixels']] df_merged.to_csv("submission.csv", index=False) df_merged.to_csv(KAGGLE_WORK_DIR + "/submission.csv", index=False)
class_names = read_class_names(cfg.classes_file) img_names = os.listdir( '/home/pcl/tf_work/TF_CenterNet/VOC/test/VOCdevkit/VOC2007/JPEGImages') for img_name in img_names: img_path = '/home/pcl/tf_work/TF_CenterNet/VOC/test/VOCdevkit/VOC2007/JPEGImages/' + img_name print(img_path) original_image = cv2.imread(img_path) original_image_size = original_image.shape[:2] image_data = image_preporcess(np.copy(original_image), [cfg.input_image_h, cfg.input_image_w]) image_data = image_data[np.newaxis, ...] t0 = time.time() detections = sess.run(det, feed_dict={inputs: image_data}) detections = post_process(detections, original_image_size, [cfg.input_image_h, cfg.input_image_w], cfg.down_ratio, cfg.score_threshold) print('Inferencce took %.1f ms (%.2f fps)' % ((time.time() - t0) * 1000, 1 / (time.time() - t0))) if cfg.use_nms: cls_in_img = list(set(detections[:, 5])) results = [] for c in cls_in_img: cls_mask = (detections[:, 5] == c) classified_det = detections[cls_mask] classified_bboxes = classified_det[:, :4] classified_scores = classified_det[:, 4] inds = py_nms(classified_bboxes, classified_scores, max_boxes=50, iou_thresh=0.5)
def update_one_model(ori_img, diff_map, temp_fname, best_temp_fname, darknet_model, frcnn_model, flag, start_yolo_num, start_frcnn_num, dest_num, rate, gt_bboxes): print("Updating %s..." % flag) # generate bbox grad mask grad_mask = np.zeros((500, 500, 3), dtype=np.float) for bbox in gt_bboxes: x1, y1, x2, y2 = bbox x1_ = max(0, int(x1 - (x2 - x1) * 0.1)) x2_ = min(499, int(x2 + (x2 - x1) * 0.1)) y1_ = max(0, int(y1 - (y2 - y1) * 0.1)) y2_ = min(499, int(y2 + (y2 - y1) * 0.1)) cv2.rectangle(grad_mask, (x1_, y1_), (x2_, y2_), (255, 255, 255), -1) grad_mask = np.swapaxes(np.swapaxes(grad_mask, 1, 2), 0, 1).reshape( (1, 3, 500, 500)) step = 0 max_steps_num = 200 if flag == 'frcnn' else 50 best_yolo_num = start_yolo_num best_frcnn_num = start_frcnn_num min_yolo_loss = float('inf') min_frcnn_loss = float('inf') min_creterion = float('inf') best_diff_map = None gradient = np.zeros((1, 3, 500, 500), dtype=np.float) relu = torch.nn.ReLU() while (step < max_steps_num): save_format_try_image(ori_img, diff_map, temp_fname) yolo_input, frcnn_input = get_yolo_image(temp_fname), get_frcnn_image( temp_fname) yolo_input.requires_grad = True list_boxes = darknet_model(yolo_input) yolo_results = post_process(list_boxes) yolo_num = len(yolo_results) boxes_0 = list_boxes[0].view(3, 85, -1) loss_0 = torch.sum(relu(boxes_0[:, 4, :])) boxes_1 = list_boxes[1].view(3, 85, -1) loss_1 = torch.sum(relu(boxes_1[:, 4, :])) boxes_2 = list_boxes[2].view(3, 85, -1) loss_2 = torch.sum(relu(boxes_2[:, 4, :])) yolo_loss = loss_0 + loss_1 + loss_2 frcnn_input.requires_grad = True frcnn_results, scores, _ = frcnn_model( img=[frcnn_input], img_metas=[[{ 'filename': '', 'ori_filename': '', 'ori_shape': (500, 500, 3), 'img_shape': (800, 800, 3), 'pad_shape': (800, 800, 3), 'scale_factor': np.array([1.6, 1.6, 1.6, 1.6]), 'flip': False, 'flip_direction': None, 'img_norm_cfg': { 'mean': np.array([123.675, 116.28, 103.53]), 'std': np.array([58.395, 57.12, 57.375]), 'to_rgb': True } }]], return_loss=False, rescale=False) frcnn_results = np.concatenate(frcnn_results) frcnn_loss = torch.sum(relu(scores[:, :-1] - 0.049)) frcnn_num = np.sum(frcnn_results[:, 4] > 0.3) # # get gt bboxes # gt_bboxes = [] # h = w = 500 # for yolo_bbox in yolo_results: # x1, y1, x2, y2 = yolo_bbox[:4] # x1, x2 = int(x1*w), int(x2*w) # y1, y2 = int(y1*h), int(y2*h) # gt_bboxes.append([x1-x2//2, y1-y2//2, x1+x2//2, y1+y2//2]) # for frcnn_bbox in frcnn_results: # if(frcnn_bbox[-1] > 0.3): # x1, y1, x2, y2 = [int(x/1.6) for x in frcnn_bbox[:4]] # gt_bboxes.append([x1,y1,x2,y2]) # # generate bbox grad mask # grad_mask = np.zeros((500,500,3), dtype=np.float) # for bbox in gt_bboxes: # x1, y1, x2, y2 = bbox # cv2.rectangle(grad_mask, (x1,y1), (x2,y2), (255,255,255), -1) # grad_mask = np.swapaxes(np.swapaxes(grad_mask, 1, 2), 0, 1).reshape((1,3,500,500)) if (step == 0): epoch_creterion = float(yolo_num) / start_yolo_num + float( frcnn_num) / start_frcnn_num #creterion = yolo_num if flag == 'yolo' else frcnn_num creterion = 10000 * (min(1., float(yolo_num) / start_yolo_num) + min(1., float(frcnn_num) / start_frcnn_num)) + ( yolo_loss if flag == 'yolo' else frcnn_loss) if (creterion < min_creterion): min_creterion = creterion min_frcnn_loss = frcnn_loss min_yolo_loss = yolo_loss best_yolo_num = yolo_num best_frcnn_num = frcnn_num best_diff_map = diff_map.copy() copyfile(temp_fname, best_temp_fname) # check rate patch_number, area_rate = get_cd_score(fname, best_temp_fname) print( "%d @ [%d,%d, %d,%d --> %d] f_loss=%g y_loss=%g min_f_loss=%g min_y_loss=%g, best patch=%d rate=%g limit=%.2f" % (step, yolo_num, frcnn_num, best_yolo_num, best_frcnn_num, dest_num, frcnn_loss, yolo_loss, min_frcnn_loss, min_yolo_loss, patch_number, area_rate, 100. - rate)) if (((yolo_num == 0 and flag == 'yolo') or (frcnn_num == 0 and flag == 'frcnn')) and area_rate < 0.02 and patch_number <= 10): break darknet_model.zero_grad() yolo_loss.backward(retain_graph=False) yolo_d_grad = yolo_input.grad.data.cpu().numpy().reshape( (1, 3, 608, 608)) yolo_d_grad = np.swapaxes(np.swapaxes(yolo_d_grad[0], 0, 1), 1, 2) yolo_d_grad = mmcv.imresize(yolo_d_grad, (500, 500)) yolo_d_grad = np.swapaxes(np.swapaxes(yolo_d_grad, 1, 2), 0, 1).reshape((1, 3, 500, 500)) frcnn_model.zero_grad() frcnn_loss.backward(retain_graph=False) frcnn_d_grad = frcnn_input.grad.data.cpu().numpy().reshape( (1, 3, 800, 800)) frcnn_d_grad[:, 0, :, :] = frcnn_d_grad[:, 0, :, :] * (58.395 / 255.) frcnn_d_grad[:, 1, :, :] = frcnn_d_grad[:, 1, :, :] * (57.12 / 255.) frcnn_d_grad[:, 2, :, :] = frcnn_d_grad[:, 2, :, :] * (57.375 / 255.) frcnn_d_grad = np.swapaxes(np.swapaxes(frcnn_d_grad[0], 0, 1), 1, 2) frcnn_d_grad = mmcv.imresize(frcnn_d_grad, (500, 500)) frcnn_d_grad = np.swapaxes(np.swapaxes(frcnn_d_grad, 1, 2), 0, 1).reshape((1, 3, 500, 500)) #frcnn_d_norm = np.linalg.norm(frcnn_d_grad, ord=2, axis=1).reshape(500,500) #frcnn_d_norm = (frcnn_d_norm - np.min(frcnn_d_norm)) / (np.max(frcnn_d_norm) - np.min(frcnn_d_norm)) #frcnn_weight = np.repeat(frcnn_d_norm.reshape(1,1,500,500), 3, axis=1) #frcnn_d_grad = np.multiply(frcnn_weight, frcnn_d_grad) frcnn_d_grad = normalize(frcnn_d_grad.reshape(3, -1), axis=1).reshape( (1, 3, 500, 500)) frcnn_d_grad = frcnn_d_grad * 10 if (flag == 'yolo'): alpha = 0.95 else: alpha = 0.1 gradient = (1. - alpha) * frcnn_d_grad + alpha * yolo_d_grad #if(flag == 'frcnn'): # gradient = 0.9 * gradient + 0.1 * grad #else: # gradient = grad loss = yolo_loss if flag == 'yolo' else frcnn_loss if (flag == 'yolo'): step_size = 0.02 else: if (loss > 10): step_size = 0.2 #0.1 + 0.3*(float(loss)-10.)/(start_loss-10.) elif (loss > 5): step_size = 0.2 else: step_size = 0.01 step_size = step_size * (1. - float(step) / max_steps_num) #get_loss_on_grad_and_stepsize(gradient, step_size, ori_img, diff_map, temp_fname, darknet_model, frcnn_model) gradient = step_size * gradient diff_map -= gradient # check area rate diff_map[grad_mask == 0] = 0 diff_map_change = np.sum(np.abs(diff_map), axis=1) high_thresh = np.percentile(diff_map_change, 99) gray_mask = ((diff_map_change > high_thresh) * 255.).astype(np.uint8) gray_mask = gray_mask.reshape(500, 500) diff_map[0, 0, :, :][gray_mask == 0] = 0 diff_map[0, 1, :, :][gray_mask == 0] = 0 diff_map[0, 2, :, :][gray_mask == 0] = 0 # check connected parts' number save_format_try_image(ori_img, diff_map, temp_fname) cd_map = get_cd_map(fname, temp_fname) labels = measure.label(cd_map, background=0, connectivity=2) label_num = np.max(labels) if (label_num > 10): areas = [np.sum(labels == i) for i in range(1, label_num + 1)] label_ids = list(range(1, label_num + 1)) areas, label_ids = zip(*sorted(zip(areas, label_ids))) for i in label_ids[:-10]: #gray_mask[labels==i] = 0 diff_map[0, 0, :, :][labels == i] = 0 diff_map[0, 1, :, :][labels == i] = 0 diff_map[0, 2, :, :][labels == i] = 0 #kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3, 3)) #gray_mask = cv2.morphologyEx(gray_mask, cv2.MORPH_CLOSE, kernel) #gray_mask = gray_mask.reshape(500,500) #diff_map[0,0,:,:][gray_mask == 0] = 0 #diff_map[0,1,:,:][gray_mask == 0] = 0 #diff_map[0,2,:,:][gray_mask == 0] = 0 #see = check_image(diff_map) #cv2.imwrite('check/%03d_region.jpg' % step, see) #cv2.imwrite('check/%03d_region_filter.jpg' % step, cv2.medianBlur(see, 3)) step += 1 return float(best_yolo_num) / start_yolo_num + float( best_frcnn_num) / start_frcnn_num >= epoch_creterion, best_diff_map
def sigmoid(x): return 1/(1+np.exp(-x)) class_params = {} for class_id in range(4): print(class_id) attempts = [] for t in range(0, 100, 5): t /= 100 for ms in tqdm.tqdm([0, 100, 1200, 5000, 10000], desc=f'{class_id+1}/4; {t}/100'): masks = [] for i in tqdm.tqdm(range(class_id, len(probabilities), 4)): probability = probabilities[i] predict, num_predict = post_process( sigmoid(probability), t, ms) masks.append(predict) gc.collect() d = [] for i, j in zip(masks, valid_masks[class_id::4]): if (i.sum() == 0) & (j.sum() == 0): d.append(1) else: d.append(dice(i, j)) gc.collect() attempts.append((t, ms, np.mean(d))) gc.collect() attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice']) attempts_df = attempts_df.sort_values('dice', ascending=False) print(attempts_df.head())
def ensemble(): os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ------------------------------------------------------------------------------------------------------------ # parmeters and configs # ------------------------------------------------------------------------------------------------------------ config_paths320 = [ 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold0.yml', 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold1.yml', 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold2.yml', 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold3.yml', 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold4.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold0.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold1.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold2.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold3.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold4.yml', ] #see there use later on # LABEL_THRESHOLDS = [0.68, 0.69, 0.69, 0.67] # MASK_THRESHOLDS = [0.31, 0.36, 0.31, 0.34] LABEL_THRESHOLDS = [0.67, 0.67, 0.67, 0.67,0.67,0.67,0.67,0.50] MASK_THRESHOLDS = [0.31, 0.31, 0.31, 0.31,0.31,0.31,0.31,0.31] # MIN_SIZES = [7500, 7500, 7500, 7500,7500,7500,7500,7500] MIN_SIZES = [0,0,0,0,0,0,0,0] WEIGHTS = [0.5, 0.5] # ------------------------------------------------------------------------------------------------------------ # # ------------------------------------------------------------------------------------------------------------ config = load_config('config/base_config.yml') ''' load the models for evaluation''' def get_model_and_loader(config_paths): config = load_config(config_paths[0]) models = [] for c in config_paths: models.append(load_model(c)) model = MultiSegModels(models) print(config.data.test_dir) testloader = make_loader( data_folder=config.data.test_dir, df_path=config.data.sample_submission_path, phase='test', img_size=(config.data.height, config.data.width), batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms(config.transforms.test) ) return model, testloader model320, loader320=get_model_and_loader(config_paths320) predictions = [] with torch.no_grad(): for (batch_fnames320, batch_images320) in tqdm(loader320): batch_images320 = batch_images320.to(config.device) print(batch_images320.size()) batch_preds320 = predict_batch( model320, batch_images320, tta=config.test.tta) #resize the images from multi resolution models batch_preds320 = resize_batch_images( batch_preds320, SUB_HEIGHT, SUB_WIDTH) batch_preds=batch_preds320 batch_labels320 = torch.nn.functional.adaptive_max_pool2d(torch.sigmoid( torch.Tensor(batch_preds320)), 1).view(batch_preds320.shape[0], -1) #print(batch_labels320) #change batch_labels by weighing factor later on batch_labels =batch_labels320 print("batch_preds",batch_preds.shape) print("batch_labels",batch_labels.size()) for fname, preds, labels in zip(batch_fnames320, batch_preds, batch_labels): print("ad",labels.size()) for cls in range(8): if labels[cls] <= LABEL_THRESHOLDS[cls]: pred = np.zeros(preds[cls, :, :].shape) print("setting 0",cls) else: if cls==7: print("ok") #print("probability",preds[cls, :, :]) pred, _ = post_process( preds[cls, :, :], MASK_THRESHOLDS[cls], MIN_SIZES[cls], height=SUB_HEIGHT, width=SUB_WIDTH) cls_name = INV_CLASSES[cls] print(fname) dump_name='results/masks/experiment1/'+fname+'class_'+str(cls)+'.jpg' print(dump_name) cv2.imwrite(dump_name, pred* 255)