out_img.save(os.path.join(output_folder, im_path.replace(".jpg", ".png"))) elapsed_time = (time.time() - start_time) eta = int((total - count) * (elapsed_time/count)) print "processed %s\t%d/%d\teta: %s" % (im_path, count, total, str(datetime.timedelta(seconds=eta))) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Parameters") parser.add_argument("--target", default= "initweb", help="The target image folder. \"initweb\" is for web images with init-sec. " "\"webweb\" is for web images with web-sec. " "\"initvoc\" is for voc images with init-sec. default: initweb") parser.add_argument("--gpu", default=0, type=int, help="Device indices. default: 0") args = parser.parse_args() misc.my_mkdir(config.CACHE_PATH) if args.target == "initweb": image_folder = config.WEB_IMAGE_FOLDER output_folder = os.path.join(config.CACHE_PATH, config.WEB_MASK_FOLDER_INITSEC) image_list = [] for im_path in os.listdir(image_folder): if im_path.endswith(".jpg") and not im_path.startswith("."): image_list.append(im_path) model_name = "SEC_%s" % config.BASE_NET exec ("import cores.symbols." + model_name + " as net_symbol") weight_file = os.path.join(config.SNAPSHOT_FOLDER, "%s_init-1.params"%(model_name)) infer_use_SEC(image_folder=image_folder, output_folder=output_folder, image_list=image_list, net_symbol=net_symbol, weight_file=weight_file, gpu=args.gpu, use_crf=False) elif args.target == "webweb":
def infer_use_SEC(image_folder, output_folder, image_list, net_symbol, weight_file, multi_label_file=None, gpu=0, use_crf=False): misc.my_mkdir(output_folder) cmap = voc_cmap.get_cmap() os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"]="0" mean_rgb = mx.nd.array(config.MEAN_RGB, ctx=mx.cpu()).reshape((1, 1, 3)) crf_filter = CRF(scale_factor=1.0) if multi_label_file is not None: with open(multi_label_file, 'rb') as f: data_dict = pickle.load(f) seg_net = net_symbol.create_infer(config.CLASS_NUM, config.WORKSPACE) arg_dict, aux_dict, _ = misc.load_checkpoint(weight_file) mod = mx.mod.Module(seg_net, data_names=["data"], label_names=[], context=mx.gpu(gpu)) mod.bind(data_shapes=[("data", (1, 3, config.INPUT_SIZE_SEC, config.INPUT_SIZE_SEC))], for_training=False, grad_req="null") initializer = mx.init.Normal() initializer.set_verbosity(True) mod.init_params(initializer=initializer, arg_params=arg_dict, aux_params=aux_dict, allow_missing=True) file_list = image_list random.shuffle(file_list) total = len(file_list) count = 0 start_time = time.time() for im_path in file_list: count += 1 if os.path.exists(os.path.join(output_folder, im_path.replace(".jpg", ".png"))): print "skipped %s because it already exists." % im_path continue #get mask buf = mx.nd.array( np.frombuffer(open(os.path.join(image_folder, im_path), 'rb').read(), dtype=np.uint8), dtype=np.uint8, ctx=mx.cpu()) im = mx.image.imdecode(buf) h, w = im.shape[:2] resized_im = mx.image.imresize(im, config.INPUT_SIZE_SEC, config.INPUT_SIZE_SEC, interp=1) resized_im = resized_im.astype(np.float32) resized_im -= mean_rgb resized_im = mx.nd.transpose(resized_im, [2, 0, 1]) resized_im = mx.nd.expand_dims(resized_im, 0) mod.forward(mx.io.DataBatch(data=[resized_im])) score = mx.nd.transpose(mod.get_outputs()[0].copyto(mx.cpu()), [0, 2, 3, 1]) score = mx.nd.reshape(score, (score.shape[1], score.shape[2], score.shape[3])) up_score = mx.nd.transpose(mx.image.imresize(score, w, h, interp=1), [2, 0, 1]) if multi_label_file is not None: tmp_label = data_dict[im_path.replace(".jpg", "")] image_level_labels = np.zeros((config.CLASS_NUM - 1)) image_level_labels[tmp_label] = 1 image_level_labels = np.insert(image_level_labels, 0, 1) image_level_labels = image_level_labels.reshape((config.CLASS_NUM, 1, 1)) image_level_labels = mx.nd.array(image_level_labels, ctx=mx.cpu()) up_score *= image_level_labels up_score = up_score.asnumpy() up_score[up_score < 0.00001] = 0.00001 # #renormalize if use_crf: mask = np.argmax(crf_filter.inference(im.asnumpy(), np.log(up_score)), axis=0) else: mask = np.argmax(up_score, axis=0) out_img = np.uint8(mask) out_img = Image.fromarray(out_img) out_img.putpalette(cmap) out_img.save(os.path.join(output_folder, im_path.replace(".jpg", ".png"))) elapsed_time = (time.time() - start_time) eta = int((total - count) * (elapsed_time/count)) print "processed %s\t%d/%d\teta: %s" % (im_path, count, total, str(datetime.timedelta(seconds=eta)))
def get_voc_mask(image_folder, output_folder, net_symbol, class_num, flist_path, epoch, model_prefix, ctx, workspace, max_dim, rgb_mean, scale_list, multi_label_file=None, min_pixel=None, sec_mask_folder=None): misc.my_mkdir(output_folder) cmap = voc_cmap.get_cmap() os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" seg_net = net_symbol.create_infer(class_num, workspace) arg_dict, aux_dict, _ = misc.load_checkpoint(model_prefix, epoch) if multi_label_file is not None: with open(multi_label_file, 'rb') as f: data_dict = pickle.load(f) mod = mx.mod.Module(seg_net, label_names=[], context=ctx) mod.bind(data_shapes=[("data", (1, 3, max_dim, max_dim))], for_training=False, grad_req="null") initializer = mx.init.Normal() initializer.set_verbosity(True) mod.init_params(initializer=initializer, arg_params=arg_dict, aux_params=aux_dict, allow_missing=True) data_producer = InferenceDataProducer(im_root=image_folder, mask_root="", flist_path=flist_path, rgb_mean=rgb_mean, scale_list=scale_list) nbatch = 0 while True: data = data_producer.get_data() if data is None: break im_list = data[0] label = data[1].squeeze() file_name = data[2] final_scoremaps = mx.nd.zeros( (class_num, label.shape[0], label.shape[1])) for im in im_list: mod.reshape(data_shapes=[("data", im.shape)]) mod.forward(mx.io.DataBatch(data=[mx.nd.array(im)])) score = mx.nd.transpose(mod.get_outputs()[0].copyto(mx.cpu()), [0, 2, 3, 1]) score = mx.nd.reshape( score, (score.shape[1], score.shape[2], score.shape[3])) up_score = mx.nd.transpose( mx.image.imresize(score, label.shape[1], label.shape[0], interp=1), [2, 0, 1]) final_scoremaps += up_score final_scoremaps = final_scoremaps.asnumpy() if multi_label_file is not None: tmp_label = data_dict[file_name] image_level_labels = np.zeros((class_num - 1)) image_level_labels[tmp_label] = 1 image_level_labels = np.insert(image_level_labels, 0, 1) image_level_labels = image_level_labels.reshape((class_num, 1, 1)) final_scoremaps *= image_level_labels pred_label = final_scoremaps.argmax(0) if sec_mask_folder is not None: sec_mask = Image.open( os.path.join(config.CACHE_PATH, config.VOC_MASK_FOLDER_INITSEC, file_name + ".png")) sec_mask = np.array(sec_mask) assert multi_label_file is not None image_label = data_dict[file_name] + 1 fg_index = pred_label > 0 for l in image_label: if np.sum(pred_label == l) <= min_pixel: pred_label[sec_mask == l] = l inter_index = ((sec_mask == l) & fg_index) pred_label[inter_index] = l out_img = np.uint8(pred_label) out_img = Image.fromarray(out_img) out_img.putpalette(cmap) out_img.save(os.path.join(output_folder, file_name + ".png")) nbatch += 1 if nbatch % 10 == 0: print "processed %dth batch" % nbatch
import os from cores.config import conf import scipy.io as sio import numpy as np import cores.utils.misc as misc import shutil from PIL import Image import cPickle as pickle #convert SBD data and VOC12 data to our format. if __name__ == "__main__": misc.my_mkdir(conf.DATASET_PATH) misc.my_mkdir(os.path.join(conf.DATASET_PATH, conf.VOC_TRAIN_IM_FOLDER)) misc.my_mkdir(os.path.join(conf.DATASET_PATH, conf.VOC_VAL_IM_FOLDER)) misc.my_mkdir(os.path.join(conf.DATASET_PATH, conf.VOC_VAL_MASK_FOLDER)) # process SBD sbd_list = [] with open(os.path.join(conf.SBD_PATH, "train.txt")) as f: sbd_list += [i.strip() for i in f.readlines()] with open(os.path.join(conf.SBD_PATH, "val.txt")) as f: sbd_list += [i.strip() for i in f.readlines()] with open( os.path.join(conf.VOCDEVKIT_PATH, "ImageSets", "Segmentation", "train.txt")) as f: voc_train_list = [i.strip() for i in f.readlines()] with open( os.path.join(conf.VOCDEVKIT_PATH, "ImageSets", "Segmentation", "val.txt")) as f:
import argparse import mxnet as mx import cores.utils.misc as misc import os from cores.config import conf import logging from cores.train_multi_wrapper import train_multi_wrapper from cores.generate_fg_cues import generate_fg_cues if __name__ == "__main__": parser = argparse.ArgumentParser(description="Training parameters") parser.add_argument("--gpus", default="0", help="Device indices.") args = parser.parse_args() os.environ["MXNET_CPU_WORKER_NTHREADS"] = str(conf.CPU_WORKER_NUM) misc.my_mkdir(conf.LOG_FOLDER) misc.my_mkdir(conf.SNAPSHOT_FOLDER) misc.my_mkdir(conf.CACHE_PATH) log_file_name = os.path.join(conf.LOG_FOLDER, "train_fg_cue_net.log") if os.path.exists(log_file_name): os.remove(log_file_name) logging.basicConfig(filename=log_file_name, level=logging.INFO) console = logging.StreamHandler() logging.getLogger().addHandler(console) im_folder = os.path.join(conf.DATASET_PATH, conf.VOC_TRAIN_IM_FOLDER) multi_label_file = os.path.join(conf.DATASET_PATH, conf.VOC_TRAIN_MULTI_FILE) cue_file = os.path.join(conf.CACHE_PATH, conf.FG_CUE_FILE) snapshot_prefix = os.path.join(conf.SNAPSHOT_FOLDER, "fg_cue_net")
import cores.utils.misc as misc import numpy as np import cores.utils.callbacks as callbacks if __name__ == "__main__": parser = argparse.ArgumentParser(description="Training parameters") parser.add_argument("--gpus", default="0", help="Device indices.") parser.add_argument("--epoch", default=0, type=int, help="Starting epoch.") parser.add_argument("--lr", default=-1, type=float, help="Learning rate.") args = parser.parse_args() misc.my_mkdir(conf.SNAPSHOT_FOLDER) os.environ["MXNET_CPU_WORKER_NTHREADS"] = str(conf.CPU_WORKER_NUM) ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] log_file_name = os.path.join(conf.LOG_FOLDER, "train_SEC_model.log") if os.path.exists(log_file_name) and args.epoch==0: os.remove(log_file_name) logging.basicConfig(filename=log_file_name, level=logging.INFO) console = logging.StreamHandler() logging.getLogger().addHandler(console) bg_cue_file = os.path.join(conf.CACHE_PATH, conf.BG_CUE_FILE) fg_cue_file = os.path.join(conf.CACHE_PATH, conf.FG_CUE_FILE) multi_lable_file = os.path.join(conf.DATASET_PATH, conf.VOC_TRAIN_MULTI_FILE) output_cue_file = os.path.join(conf.CACHE_PATH, conf.CUE_FILE_INITSEC)
mask[new_bottom:, :] = 0 mask[:, 0:new_left] = 0 mask[:, new_right:] = 0 cv2.grabCut(im[:, :, ::-1], mask, None, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_MASK) mask[mask == 2] = 0 mask[mask > 0] = 1 final_mask += mask val_count += 1 if val_count >= max_sample: print "enough samples, break." break if val_count < max_sample: print "bad result" else: output_im = Image.fromarray(final_mask) output_im.save(os.path.join(output_folder, l_name)) print "processed %s" % f if __name__ == "__main__": mask_folder = os.path.join(config.CACHE_PATH, config.WEB_MASK_FOLDER_WEBSEC) tmp_output_folder = os.path.join(config.CACHE_PATH, config.TMP_GC_RESULTS_FOLDER) label_file = os.path.join(config.CACHE_PATH, config.WEB_IMAGE_LABEL_FILE) misc.my_mkdir(tmp_output_folder) #refine masks using grabcut gc_refine(im_folder=config.WEB_IMAGE_FOLDER, mask_folder=mask_folder,output_folder=tmp_output_folder, label_file=label_file, max_sample=config.MAX_SAMPLE_GC, max_trial=config.MAX_TRIAL_GC, offset=config.OFFSET_GC, min_dim_th=config.MIN_DIM_TH_GC, margin=config.MARGIN_GC)
f.write("%s\n" % i) print "done!" if __name__ == "__main__": parser = argparse.ArgumentParser(description="Training parameters") parser.add_argument("--nogc", help="Do not include Grabcut refinement results.", action="store_true") args = parser.parse_args() mask_folder = os.path.join(config.CACHE_PATH, config.WEB_MASK_FOLDER_WEBSEC) final_output_folder = os.path.join(config.CACHE_PATH, config.FINAL_WEB_MASK_FOLDER) misc.my_mkdir(final_output_folder) flist_path = os.path.join(config.CACHE_PATH, config.WEB_IMAGE_FLIST) if args.nogc: #no refinement. Simply copy the files and create a list. count = 0 total = len(os.listdir(mask_folder)) flist = [] for i in os.listdir(mask_folder): if i.endswith(".png") and not i.startswith("."): flist.append(i.replace(".png", "")) shutil.copyfile(os.path.join(mask_folder, i), os.path.join(final_output_folder, i)) count += 1 print "processed %s\t%d/%d" % (i, count, total) with open(flist_path, "w") as f:
parser.add_argument("--gpu", default="0", help="Device index.") parser.add_argument("--epoch", default=0, type=int, help="snapshot name for evaluation") parser.add_argument("--savemask", help="whether save the prediction masks.", action="store_true") parser.add_argument("--savescoremap", help="whether save the prediction scoremaps.", action="store_true") parser.add_argument("--model", default="web", help="evaluate the web model or final model. either \"web\" or \"final\"") parser.add_argument("--crf", help="whether use crf for post processing.", action="store_true") args = parser.parse_args() misc.my_mkdir(config.OUTPUT_FOLDER) log_file_name = os.path.join(config.LOG_FOLDER, "eval_model.log") logging.basicConfig(filename=log_file_name, level=logging.INFO) console = logging.StreamHandler() logging.getLogger().addHandler(console) assert args.model == "web" or args.model == "final" if args.model == "web": model_name = "web_fcn_%s" % config.BASE_NET elif args.model == "final": model_name = "final_fcn_%s" % config.BASE_NET if args.epoch == 0: while True: untested_list = get_untested_list(config.SNAPSHOT_FOLDER, config.OUTPUT_FOLDER, model_name=model_name)
def main(): conf.epoch = args.epoch conf.gpu = args.gpu conf.savescoremap = args.savescoremap conf.savemask = args.savemask conf.crf = args.crf conf.flip = args.flip logging.info(conf) crf = CRF(pos_xy_std=conf.CRF_POS_XY_STD, pos_w=conf.CRF_POS_W, bi_xy_std=conf.CRF_BI_XY_STD, bi_rgb_std=conf.CRF_BI_RGB_STD, bi_w=conf.CRF_BI_W) os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"]="0" os.environ["MXNET_CPU_WORKER_NTHREADS"] = str(conf.CPU_WORKER_NUM) model_name = "SEC_%s" % conf.BASE_NET exec ("import cores.symbols." + model_name + " as net") epoch_str = str(args.epoch) output_path = os.path.join(conf.OUTPUT_FOLDER, model_name + "_epoch" + epoch_str) misc.my_mkdir(output_path) if conf.savescoremap: misc.my_mkdir(os.path.join(output_path, "scoremaps")) if conf.savemask: misc.my_mkdir(os.path.join(output_path, "masks")) ctx = mx.gpu(int(args.gpu)) cmap = get_cmap() seg_net = net.create_infer(conf.CLASS_NUM, conf.WORKSPACE) seg_net_prefix = os.path.join(conf.SNAPSHOT_FOLDER, model_name) arg_dict, aux_dict, _ = misc.load_checkpoint(seg_net_prefix, args.epoch) mod = mx.mod.Module(seg_net, data_names=["data"], label_names=[], context=ctx) mod.bind(data_shapes=[("data", (1, 3, conf.INPUT_SIZE_SEC, conf.INPUT_SIZE_SEC))], for_training=False, grad_req="null") initializer = mx.init.Normal() initializer.set_verbosity(True) mod.init_params(initializer=initializer, arg_params=arg_dict, aux_params=aux_dict, allow_missing=True) data_producer = InferenceDataProducer( im_root=os.path.join(conf.DATASET_PATH, conf.VOC_VAL_IM_FOLDER), mask_root=os.path.join(conf.DATASET_PATH, conf.VOC_VAL_MASK_FOLDER), flist_path=os.path.join(conf.DATASET_PATH, conf.VOC_VAL_LIST), rgb_mean=conf.MEAN_RGB, input_size=conf.INPUT_SIZE_SEC) nbatch = 0 eval_metrics = [metrics.IOU(conf.CLASS_NUM, get_classnames())] logging.info("In evaluation...") while True: data = data_producer.get_data() if data is None: break im = data[0] label = data[1].squeeze() im_name = data[2] ori_im = data[3] mod.forward(mx.io.DataBatch(data=[im])) score = mx.nd.transpose(mod.get_outputs()[0].copyto(mx.cpu()), [0, 2, 3, 1]) score = mx.nd.reshape(score, (score.shape[1], score.shape[2], score.shape[3])) up_score = mx.nd.transpose(mx.image.imresize(score, label.shape[1], label.shape[0], interp=1), [2, 0, 1]) if conf.flip: flip_im = im[:, :, :, ::-1] mod.forward(mx.io.DataBatch(data=[flip_im])) flip_score = mx.nd.transpose(mod.get_outputs()[0].copyto(mx.cpu()), [0, 2, 3, 1]) flip_score = mx.nd.reshape(flip_score, (flip_score.shape[1], flip_score.shape[2], flip_score.shape[3])) flip_up_score = mx.nd.transpose(mx.image.imresize(flip_score, label.shape[1], label.shape[0], interp=1), [2, 0, 1]) up_score += mx.nd.flip(flip_up_score, axis=2) up_score /= 2 if conf.crf: final_scoremaps = mx.nd.log(up_score).asnumpy() final_scoremaps = crf.inference(ori_im.asnumpy(), final_scoremaps) else: final_scoremaps = up_score.asnumpy() pred_label = final_scoremaps.argmax(0) for eval in eval_metrics: eval.update(label, pred_label) if conf.savemask: out_img = np.uint8(pred_label) out_img = Image.fromarray(out_img) out_img.putpalette(cmap) output_name = im_name[:im_name.rfind(".")] output_name += ".png" out_img.save(os.path.join(output_path, "masks", output_name)) if conf.savescoremap: output_name = im_name[:im_name.rfind(".")] np.save(os.path.join(output_path, "scoremaps", output_name), final_scoremaps) nbatch += 1 if nbatch % 10 == 0: print "processed %dth batch" % nbatch logging.info("Epoch [%d]: " % args.epoch) for m in eval_metrics: logging.info("[overall] [%s: %.4f]" % (m.get()[0], m.get()[1])) if m.get_class_values() is not None: scores = "[perclass] [" for v in m.get_class_values(): scores += "%s: %.4f\t" % (v[0], v[1]) scores += "]" logging.info(scores)
if __name__ == "__main__": parser = argparse.ArgumentParser(description="Training parameters") parser.add_argument("--gpu", default="0", help="Device index.") parser.add_argument("--epoch", default=0, type=int, help="snapshot name for evaluation") parser.add_argument("--savemask", help="whether save the prediction masks.", action="store_true") parser.add_argument("--savescoremap", help="whether save the prediction scoremaps.", action="store_true") parser.add_argument("--crf", help="whether use crf for post processing.", action="store_true") parser.add_argument("--flip", help="whether use flip.", action="store_true") misc.my_mkdir(conf.OUTPUT_FOLDER) misc.my_mkdir(conf.LOG_FOLDER) args = parser.parse_args() logging.basicConfig(filename=os.path.join(conf.LOG_FOLDER, "evaluation_log.log"), level=logging.INFO) console = logging.StreamHandler() logging.getLogger().addHandler(console) main()