def __init__(self, root=args.path4Image, graph_type="AFF", transofrms=None, train=True, test=False, skip_untill=-1, start_idx=0, end_idx=None, device=None): self.label_list = load_img_name_list(args.path4train_images) self.seg_label_dict = dict() self.test = test self.graph_type = graph_type # AFF|RW|GT self.train_file = load_img_name_list(args.path4train_images) self.skip_untill = skip_untill self.start_idx = start_idx if end_idx is None: self.end_idx = len(self.label_list) else: self.end_idx = end_idx self.device = device print("self.device: ", self.device) # self.ignore_list = [ # f.split(".")[0] for f in os.listdir( # "/home/u7577591/pygcn/data/GCN_prediction/label/2020_7_9_17h" # )] self.ignore_list = []
def gen_dataset(predict_root=None, destination="data_RES(2020)", destination4visulization="RES38_PSEUDO_LABEL", img_list_path=None): """ Generate all data for a DataSet === - save data in `./data`,include: - rgbxy: the simplest features, nparray - feature: the affinity features, each feature of node is 400+ dimensions, nparray - train_idx: nparray, value: bool, shape: [H*W] - test_idx: nparray, value: bool, shape: [H*W] - label: one-hot label nparray, value: int, shape: [H*W] """ t_start = time.time() print("") if predict_root is None: predict_root = os.path.join("..", "psa", "RES_CAM__") # img_list = read_file('./train.txt') img_list = load_img_name_list(img_list_path) len_img_list = len(img_list) print("len_img_list ", len_img_list) # exclude = [ # "2007_000032", "2008_002350", "2008_004633", "2008_008057", # "2010_000197" # ] for idx, name in enumerate(img_list, start=1): # segmentation_class: print("===========\n[{}/{}]: {} generate data...".format( idx, len_img_list, name)) gen_one_data(img_name=name, predict_root=predict_root, destination=destination, destination4visulization=destination4visulization) show_timing(time_start=t_start, time_end=time.time())
def train(n_split=1, process_id=1, GPU_id=0, use_lap=True): """ train whole dataset by calling train() """ os.environ["CUDA_VISIBLE_DEVICES"] = str(GPU_id) time_now = datetime.datetime.today() time_now = "{}_{}_{}_{}h".format(time_now.year, time_now.month, time_now.day, time_now.hour) descript = "dataset: {}, graph: {}, feature: {}, partial label: {}".format( os.path.basename(args.path4Data), os.path.basename(args.path4AffGraph), os.path.basename(args.path4node_feat), os.path.basename(args.path4partial_label_label)) print("descript ", descript) #args.path4GCN_label = os.path.join(args.path4GCN_label, time_now) #args.path4GCN_logit = os.path.join(args.path4GCN_logit, time_now) # Split dataset len_dataset = len(load_img_name_list(args.path4train_images)) chunk = int(np.ceil(len_dataset / n_split)) start_idx = chunk * (int(process_id) - 1) end_idx = start_idx + chunk if (start_idx + chunk) < len_dataset else len_dataset # Train a separate GCN for each image gcn_train(descript=descript, start_index=start_idx, end_index=end_idx, GPU=GPU_id)
def __init__(self, root=args.path4Image, graph_type="AFF", start_idx=0, end_idx=None, device=None): self.label_list = load_img_name_list(args.path4train_images) self.seg_label_dict = dict() # AFF self.graph_type = graph_type self.train_file = load_img_name_list(args.path4train_images) self.start_idx = start_idx self.end_idx = len(self.label_list) if end_idx is None else end_idx self.device = device print("self.device: ", self.device) self.ignore_list = []
def apply(**kwargs): parameter_dict = dict() t_start = time.time() time_now = datetime.datetime.today() time_now = "{}_{}_{}_{}h{}m".format(time_now.year, time_now.month, time_now.day, time_now.hour, time_now.minute) descript = "" parameter_dict["num_cpu"] = os.cpu_count() // 2 parameter_dict["CRF_parameter"] = args.CRF parameter_dict["path4saveCRF_label"] = args.path4Complete_label_label parameter_dict["path4saveCRF_logit"] = args.path4Complete_label_logit if "pred_root" not in kwargs.keys(): parameter_dict["pred_root"] = args.path4GCN_logit else: parameter_dict["pred_root"] = kwargs["pred_root"] parameter_dict["f_list"] = args.path4train_images evaluate_folder = parameter_dict["path4saveCRF_label"] img_list = load_img_name_list(parameter_dict["f_list"]) # === load parameter for k, v in kwargs.items(): if k in parameter_dict.keys(): if "CRF_parameter" == k: parameter_dict[k] = eval(v) else: parameter_dict[k] = v print("{}: {}".format(k, parameter_dict[k])) print("path4saveCRF_label: ", parameter_dict["path4saveCRF_label"]) print("pred_root: ", parameter_dict["pred_root"]) p = Pool(parameter_dict["num_cpu"]) crfP = partial(crf, prediction_root=parameter_dict["pred_root"], save_path_label=parameter_dict["path4saveCRF_label"], save_path_logit=parameter_dict["path4saveCRF_logit"], CRF_parameter=parameter_dict["CRF_parameter"]) # run crf by multiprocessing for _ in tqdm.tqdm(p.imap_unordered(crfP, img_list), total=len(img_list)): pass p.close() p.join() evaluate_dataset_IoU(file_list=parameter_dict["f_list"], predicted_folder=evaluate_folder, descript=descript, path4GT=args.path4VOC_class_aug) show_timing(time_start=t_start, time_end=time.time())
def PPL_generate(num_cpu=1): """ parallelly generate partial pseudo label """ topredion_rate = args.confident_ratio pred_folder = args.path4CAM save_folder = args.partial_label_label save_folder_logit = args.partial_label_logit folder_list = ['_DN_UP', '_DN', '_UP'] for f_ in folder_list: if not os.path.exists(save_folder + f_): os.makedirs(save_folder + f_) torch.multiprocessing.set_start_method('spawn') device = torch.device("cuda") img_list = load_img_name_list(args.path4train_images) print("type(img_list)", type(img_list)) p = Pool(num_cpu) gen_partial_label_with_ratioP = partial( gen_partial_label_with_ratio, predict_root=pred_folder, destination4visulization=save_folder, destination4logit=save_folder_logit, confident_region=topredion_rate, device=device) for _ in tqdm.tqdm(p.imap_unordered(gen_partial_label_with_ratioP, img_list), total=len(img_list)): pass p.close() p.join() evaluate_dataset_IoU(predicted_folder=save_folder + "_DN_UP")
def gen_label_from_data(destination, root=os.path.join("..", "..", "..", "work", "User", "pygcn", "data")): """ - Visualize the pseudo label used to train GCN , (IoU=54.63) === - the file (.ally and .x) in work/UserName/pygcn/data/ is need - assume that the training data is use pickle to save - e.g. xxx.allx | xxx.ally | xxx.x | xxx.y | ... - assume .ally is prediction ont-hot label, type: np.array, shape: [H*W,21] - H=np.ceil(H_original_size/8), W also so the same way - assume .x is train_idx, type: list, shape: [<H*W], element: int not bool """ import pickle as pkl import sys t_start = time.time() file_name_list = load_img_name_list(args.path4train_images) print("file_name_list: ", len(file_name_list)) for file_name in file_name_list: print("Read... {}".format(file_name)) """ x: train_idx. [list] y: test_idx. [list] allx: features for train and test. [sparse matrix] ally: labels, pseudo for train_idx, ground truth for test_idx. [np.array] rgbxy: another feature for comparison. [np.array] """ seg_label = None extension_list = ["ally", "x"] objects = [] for ext in extension_list: with open( "{}.{}.{}".format(os.path.join(args.path4data, "ind"), file_name, ext), 'rb') as f: if sys.version_info > (3, 0): objects.append(pkl.load(f, encoding='latin1')) else: objects.append(pkl.load(f)) seg_label, train_idx = tuple(objects) print("seg_label.shape ", seg_label.shape) print("type(train_idx[0]): ", type(train_idx[0])) """ find H,W ============================================= """ GT_seg = Image.open(os.path.join(args.path4Class, file_name + '.png')) print("np.array(GT_seg).shape:", np.array(GT_seg).shape) H_origin, W_origin = np.array(GT_seg).shape H, W = int(np.ceil(H_origin / 8)), int(np.ceil(W_origin / 8)) seg_label_HW = np.argmax(seg_label, axis=1) """ reshape the seg_label from [H*W,21] to [H,W] ============================================= """ seg_label_HW = seg_label_HW.reshape(H, W) train_idx_bool = np.zeros_like(seg_label_HW.reshape(-1)) print("train_idx_bool.shape ", train_idx_bool.shape) train_idx_bool[:] = False train_idx_bool[train_idx] = True train_idx_bool = train_idx_bool.reshape(H, W) print("np.array(train_idx).shape: ", np.array(train_idx).shape) """ upsampling label and savi it in a visualizable way ============================================= """ seg_label_HW_UP = F.interpolate( torch.FloatTensor(seg_label_HW[np.newaxis, np.newaxis]), size=(H_origin, W_origin), mode='nearest').squeeze().cpu().numpy() train_idx_bool_UP = np.zeros_like(seg_label_HW_UP.reshape(-1)) train_idx_bool_UP = F.interpolate( torch.FloatTensor(train_idx_bool[np.newaxis, np.newaxis]), size=(H_origin, W_origin), mode='nearest').squeeze().cpu().numpy() train_idx_bool_UP = np.asarray(train_idx_bool_UP, dtype=np.bool) """ save it! ============================================= """ folder_list = [ "VGG_CAM_PSEUDO_LABEL(54p63)HW", "VGG_CAM_PSEUDO_LABEL(54p63)HW_UP" ] for folder in folder_list: if not os.path.exists(folder): os.mkdir(folder) # save upsampling label print("save upsamping label!!!") scipy.misc.toimage(np.where(train_idx_bool_UP, seg_label_HW_UP, 255), cmin=0, cmax=255, pal=colors_map, mode='P').save( os.path.join("VGG_CAM_PSEUDO_LABEL(54p63)HW_UP", file_name + '.png')) # save dowsampling label (used to train GCN) print("save original label!!!") scipy.misc.toimage(np.where(train_idx_bool, seg_label_HW, 255), cmin=0, cmax=255, pal=colors_map, mode='P').save( os.path.join("VGG_CAM_PSEUDO_LABEL(54p63)HW", file_name + '.png'))
scipy.misc.toimage(new_PPL_dn, cmin=0, cmax=255, pal=colors_map, mode='P').save( os.path.join(destination4visulization, "{}.png".format(img_name))) if __name__ == "__main__": from os.path import join as opj """ - data_RES(UP_CRF_DN): apply CRF in upsample(original size) CAM then save label & score in down sample - data_RES(2020): apply CRF in downsample CAM then save label & score in upsample """ # generate partial label >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> img_list = load_img_name_list(args.path4train_images) t_start = time.time() topredion_rate = .3 cam_mode = "cam" if cam_mode == "irn": pred_folder = "{}@PIL_near@confident_ratio_{}_UP".format( opj(args.path4partial_label_label, "RES_CAM_TRAIN_AUG_PARTIAL_PSEUDO_LABEL"), topredion_rate) cam_folder = "PPL_mix_Bmap" elif cam_mode == "rw": pred_folder = "../psa/RES_RW_np" cam_folder = "RES_RW_np" else: pred_folder = "../psa/RES_CAM_TRAIN_AUG" cam_folder = "RES_CAM_TRAIN_AUG_PARTIAL_PSEUDO_LABEL"
def train2020(n_split=1, process_id=1, GPU_id=0, debug=False, use_lap=True): """ Use to train whole dataset by call train() --- """ # os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, [0, 1, 2, 3])) os.environ["CUDA_VISIBLE_DEVICES"] = str(GPU_id) IOU_dic = { .1: 78.07, .2: 81.12, .3: 81.65, .4: 81.38, .5: 80.98, .7: 79.97, .9: 78.83, 1.: 78.15 } Occupy_dic = { .1: 32.1, .2: 34.53, .3: 36.58, .4: 38.32, .5: 39.7, .7: 41.65, .9: 43.30, 1.: 44.49 } time_now = datetime.datetime.today() time_now = "{}_{}_{}_{}h".format(time_now.year, time_now.month, time_now.day, time_now.hour) # =======specify the paath and argument =============== args.hid_unit = 40 args.max_epoch = 250 args.drop_rate = .3 args.confident_ratio = 0.3 # args.path4train_images | args.path4val_images args.path4train_images = args.path4train_images if getpass.getuser() == "u7577591": args.path4AffGraph = os.path.join("/work/u7577591/irn", "AFF_MAT_normalize_IRNet") args.path4partial_label_label = os.path.join( args.path4partial_label_label, # "RES_CAM_TRAIN_AUG_PARTIAL_PSEUDO_LABEL@PIL_near@confident_ratio_{}_DN" "RES_CAM_TRAIN_AUG_PARTIAL_PSEUDO_LABEL@PIL_near@confident_ratio_{}_cam_DN_johnney" .format(args.confident_ratio)) args.path4node_feat = os.path.join( "../../../work/" + getpass.getuser() + "/irn", "AFF_FEATURE_res50_W") args.use_LP = False args.use_lap = use_lap descript = "dataset: {}, graph: {}, feature: {}, partial label: {}".format( os.path.basename(args.path4data), os.path.basename(args.path4AffGraph), os.path.basename(args.path4node_feat), os.path.basename(args.path4partial_label_label)) descript = "GCN prediction@IRNet@KNN laplacian @loss_ent@ PPL confident ratio={} & IOU@{} occupy@{}".format( args.confident_ratio, IOU_dic[args.confident_ratio], Occupy_dic[args.confident_ratio]) print("descript ", descript) print("here is branch `debug` !!") # args.path4prediction_np = os.path.join(args.path4prediction_np, # args.path4partial_label_label) args.path4GCN_label = os.path.join(args.path4GCN_label, time_now) args.path4GCN_logit = os.path.join(args.path4GCN_logit, time_now) # ==== training in split dataset ======= len_dataset = len(load_img_name_list(args.path4train_images)) # n_split = 5 # process_id = __file__ # process_id = process_id[-4] chunk = int(np.ceil(len_dataset / n_split)) start_idx = chunk * (int(process_id) - 1) end_idx = start_idx + chunk if (start_idx + chunk) < len_dataset else len_dataset # ==== training ======= train(use_crf=False, descript=descript, start_index=start_idx, end_index=end_idx, GPU=GPU_id, debug=debug) # ===== plot iou on tersorboard and evaluate the mean IOU ====== if len(os.listdir(args.path4GCN_label)) == len( load_img_name_list(args.path4train_images)): plot_iou_tersorboard(predicted_folder=args.path4GCN_label) evaluate_dataset_IoU(predicted_folder=args.path4GCN_label, file_list=args.path4train_images)