def save_pseudo_label(seg_score, seg_label, destination, img_name=None, save_npy=True): """ save label and label scores - img_name: str, only file name, not include extension or path - seg_score: numpy array, shape: [num_class,H,W] - seg_label: numpy array, shape: [H,W] """ pseudo_label_dict = dict() img_label = load_image_label_from_xml(img_name=img_name, voc12_root=args.path4VOC_root) pseudo_label_dict[0] = seg_score[0] # VOC dataset: key range 0~20 for key in img_label: pseudo_label_dict[int(key + 1)] = seg_score[int(key + 1)] # save score if save_npy: destination_np = destination4logit if not os.path.exists(destination_np): os.mkdir(destination_np) np.save(os.path.join(destination_np, img_name), pseudo_label_dict) # save mask misc_old.toimage(seg_label, cmin=0, cmax=255, pal=colors_map, mode='P').save( os.path.join(destination, "{}.png".format(img_name)))
def postprocess_image_save(model_output, img_name, save_prediction_np=False): """ 1.upsample prediction scores 2.save prediction scores (option) 3.save prediction mask """ # load image as nd_array img = imread(os.path.join(args.path4Image, img_name + ".jpg")) H_original, W_original, C = img.shape H = int(np.ceil(H_original / args.output_rate)) W = int(np.ceil(W_original / args.output_rate)) # [H*W,num_class] -> [num_class,H,W] model_output = model_output.reshape(H, W, model_output.size()[-1]).permute( 2, 0, 1) # [C,H,W] -> [1,C,H,W] model_output = model_output.unsqueeze(dim=0) # 1.upsample the predicted mask upsampling = torch.nn.Upsample(size=(H_original, W_original), mode='bilinear', align_corners=True) # [C,H,W] -> [C, H_original, W_original] up_predict = upsampling(model_output).squeeze(dim=0) # [C, H_original, W_original] -> [1, H_original, W_original] up_predict_mask = torch.argmax(up_predict, dim=0) # 2.save the prediction score if save_prediction_np: path = args.path4GCN_logit if not os.path.exists(path): os.makedirs(path) print("GCN prediction save path:", path) # p = exp{log(p)} up_predict_np = torch.exp(up_predict.clone()).cpu().numpy() img_label = load_image_label_from_xml(img_name=img_name, voc12_root=args.path4VOC_root) predict_dict = dict() predict_dict[0] = up_predict_np[0] for idx, cls_ in enumerate(img_label): if int(cls_) > 0: print("key:{} ID:{}".format(idx + 1, SEG_ID_TO_NAME[idx + 1])) predict_dict[idx + 1] = up_predict_np[idx + 1] np.save(os.path.join(path, img_name + ".npy"), predict_dict) # 3.save the prediction as label path4save = args.path4GCN_label if not os.path.isdir(path4save): os.makedirs(path4save) misc_old.toimage(up_predict_mask.cpu().numpy(), cmin=0, cmax=255, pal=colors_map, mode='P').save(os.path.join(path4save, img_name + '.png')) print("Postprocessing image:{} save in {}".format(img_name, path4save))
def save_pseudo_label(seg_score, seg_label, destination, img_name="2007_000032", save_npy=True): """ Save Label and Label Score to `.png` and dictionary === - label would be upsample to save - `img_name`: str, only file name, not include extension or path - `seg_score`: numpy array, shape: [num_class,H,W] - `seg_label`: numpy array, shape: [H,W] """ if not os.path.exists(destination): os.mkdir(destination) pseudo_label_dict = dict() img_label = load_image_label_from_xml(img_name=img_name, voc12_root=args.path4VOC_root) pseudo_label_dict[0] = seg_score[0] # key range from 0~20 if you use VOC dataset for key in img_label: # img_label +1 = segmentation_label # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # pseudo_label_dict[int(key)] or pseudo_label_dict[int(key+1)] ??? pseudo_label_dict[int(key + 1)] = seg_score[int(key + 1)] # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # save score if save_npy: destination_np = destination4logit if not os.path.exists(destination_np): os.mkdir(destination_np) np.save(os.path.join(destination_np, img_name), pseudo_label_dict) # Save label mask scipy.misc.toimage(seg_label, cmin=0, cmax=255, pal=colors_map, mode='P').save( os.path.join(destination, "{}.png".format(img_name)))
def postprocess_image_save(epoch, model_output, img_name="2007_009788", save_prediction_np=False, save_CRF=True, rgbxy_t=None): """ 1.upsampling prediction_score 2.save prediction scores (option) 3.save prediction mask """ # load image as nd_array img = imread(os.path.join(args.path4img, img_name + ".jpg")) H_original, W_original, C = img.shape H, W = int(np.ceil(H_original / args.output_rate)), int( np.ceil(W_original / args.output_rate)) # === tansfer shape(H*W,num_class) -> shape(num_class,H,W) model_output = model_output.reshape(H, W, model_output.size()[-1]).permute( 2, 0, 1) # === use bilinear to upsample the predicted mask upsampling = torch.nn.Upsample(size=(H_original, W_original), mode='bilinear', align_corners=True) model_output = model_output.unsqueeze(dim=0) # [1,C,H,W] up_predict = upsampling(model_output).squeeze( dim=0) # [C,H_original,W_original] # >>>>>>>>>> label propagation if args.use_LP: # [H,W,3] img = np.array( Image.open(os.path.join(args.path4Image, img_name + '.jpg'))) rgb = torch.Tensor(img / 255.) # [H_original,W_original,3] rgb = rgb.reshape(H_original * W_original, 3) # [H_original*W_original,3] W = gaussian_propagator(features=rgb) # [H_ori*W_ori,H_ori*W_ori] input("W.shape {}".format(W.shape)) identity = torch.eye(n=W.shape[0]) D_inv = torch.pow(torch.diag(torch.sum(W, dim=1)).inverse(), exponent=.5) S = D_inv.matmul(W.matmul(D_inv)) # [C,H,W]-> [C,H*W] -> [H_ori*W_ori,C] pred_LP = torch.matmul( torch.inverse(identity.cuda() - args.alpha4LP * S), up_predict.reshape(-1, W.shape[0]).permute(1, 0)) # [H_ori*W_ori,21] pred_LP = pred_LP.argmax(dim=1) # [H_ori*W_ori] pred_LP = pred_LP.reshape(H_original, W_original) # [H_ori,W_ori] # ============= save LP ================= if not os.path.isdir(args.path4save_LP): os.makedirs(args.path4save_LP) scipy.misc.toimage(pred_LP.cpu().numpy(), cmin=0, cmax=255, pal=colors_map, mode='P').save( os.path.join(args.path4save_LP, img_name + '.png')) # >>>>>>>>>> up_predict_mask = torch.argmax(up_predict, dim=0) # === save the prediction score in dictionary # === in os.path.join(args.path4GCN_logit, str(epoch) ) if save_prediction_np: path = args.path4GCN_logit if not os.path.exists(path): os.makedirs(path) print("GCN prediction save path:", path) up_predict_np = torch.exp(up_predict.clone()).cpu().numpy() img_label = load_image_label_from_xml(img_name=img_name, voc12_root=args.path4VOC_root) predict_dict = dict() predict_dict[0] = up_predict_np[0] for idx, cls_ in enumerate(img_label): if int(cls_) > 0: print("key:{} ID:{}".format(idx + 1, SEG_ID_TO_NAME[idx + 1])) # === note that model prediction is log(p) -> p = exp{log(p)} predict_dict[idx + 1] = up_predict_np[idx + 1] np.save(os.path.join(path, img_name + ".npy"), predict_dict) # === save the prediction as label # === in os.path.join(path4save, img_name + '.png') path4save = args.path4GCN_label if not os.path.isdir(path4save): os.makedirs(path4save) scipy.misc.toimage(up_predict_mask.cpu().numpy(), cmin=0, cmax=255, pal=colors_map, mode='P').save( os.path.join(path4save, img_name + '.png')) print("image:{} save in {}!\n".format(img_name, epoch))
def train(**kwargs): """ GCN training --- - the folder you need: - args.path4AffGraph - args.path4node_feat - path4partial_label - these folder would be created: - data/GCN_prediction/label - data/GCN_prediction/logit """ # os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, [0, 1, 2, 3])) t_start = time.time() # 根据命令行参数更新配置 args.parse(**kwargs) # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda:" + str(kwargs["GPU"])) print(device) # 把有改動的參數寫到tensorboard名稱上 if kwargs["debug"] is False: comment_init = '' for k, v in kwargs.items(): comment_init += '|{} '.format(v) writer = SummaryWriter(comment=comment_init) # === set evaluate object for evaluate later IoU = IOUMetric(args.num_class) IoU_CRF = IOUMetric(args.num_class) # === dataset train_dataloader = graph_voc(start_idx=kwargs["start_index"], end_idx=kwargs["end_index"], device=device) # === for each image, do training and testing in the same graph # for ii, (adj_t, features_t, labels_t, rgbxy_t, img_name, label_fg_t, # label_bg_t) in enumerate(train_dataloader): t4epoch = time.time() for ii, data in enumerate(train_dataloader): if data is None: continue # === use RGBXY as feature # if args.use_RGBXY: # data["rgbxy_t"] = normalize_rgbxy(data["rgbxy_t"]) # features_t = data["rgbxy_t"].clone() # === only RGB as feature t_be = time.time() if args.use_lap: """ is constructing................ """ H, W, C = data["rgbxy_t"].shape A = torch.zeros([H * W, H * W], dtype=torch.float64) def find_neibor(card_x, card_y, H, W, radius=2): """ Return idx of neibors of (x,y) in list --- """ neibors_idx = [] for idx_x in np.arange(card_x - radius, card_x + radius + 1): for idx_y in np.arange(card_y - radius, card_y + radius + 1): if (-radius < idx_x < H) and (-radius < idx_y < W): neibors_idx.append( (idx_x * W + idx_y, idx_x, idx_y)) return neibors_idx t_start = time.time() t_start = t4epoch neibors = dict() for node_idx in range(H * W): card_x, card_y = node_idx // W, node_idx % W neibors = find_neibor(card_x, card_y, H, W, radius=1) # print("H:{} W:{} | {} -> ({},{})".format( # H, W, node_idx, card_x, card_y)) for nei in neibors: # print("nei: ", nei) diff_rgb = data["rgbxy_t"][ card_x, card_y, :3] - data["rgbxy_t"][nei[1], nei[2], :3] diff_xy = data["rgbxy_t"][card_x, card_y, 3:] - data["rgbxy_t"][nei[1], nei[2], 3:] A[node_idx, nei[0]] = torch.exp( -torch.pow(torch.norm(diff_rgb), 2) / (2. * args.CRF_deeplab["bi_rgb_std"])) + torch.exp( -torch.pow(torch.norm(diff_xy), 2) / (2. * args.CRF_deeplab["bi_xy_std"])) # print("{:3.1f}s".format(time.time() - t_start)) D = torch.diag(A.sum(dim=1)) L_mat = D - A print("time for Laplacian {:3f} s".format(time.time() - t_be)) # === Model and optimizer img_label = load_image_label_from_xml(img_name=data["img_name"], voc12_root=args.path4VOC_root) img_class = [idx + 1 for idx, f in enumerate(img_label) if int(f) == 1] num_class = np.max(img_class) + 1 # debug("num_class: {} {}".format(num_class + 1, type(num_class + 1)), # line=290) model = GCN( nfeat=data["features_t"].shape[1], nhid=args.num_hid_unit, # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # image label don't have BG # adaptive num_class should have better performance nclass=args.num_class, # args.num_class| num_class # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> dropout=args.drop_rate) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # ==== moving tensor to GPU if args.cuda: model.to(device) data["features_t"] = data["features_t"].to(device) data["adj_t"] = data["adj_t"].to(device) data["labels_t"] = data["labels_t"].to(device) data["label_fg_t"] = data["label_fg_t"].to(device) data["label_bg_t"] = data["label_bg_t"].to(device) # L_mat = L_mat.to(device) # === save the prediction before training if args.save_mask_before_train: model.eval() postprocess_image_save(img_name=data["img_name"], model_output=model(data["features_t"], data["adj_t"]).detach(), epoch=0) # ==== Train model # t4epoch = time.time() criterion_ent = HLoss() # criterion_sym = symmetricLoss() for epoch in range(args.max_epoch): model.train() optimizer.zero_grad() output = model(data["features_t"], data["adj_t"]) # === seperate FB/BG label loss_fg = F.nll_loss(output, data["label_fg_t"], ignore_index=255) loss_bg = F.nll_loss(output, data["label_bg_t"], ignore_index=255) # F.log_softmax(label_fg_t, dim=1) # loss_sym = criterion_sym(output, labels_t, ignore_index=255) loss = loss_fg + loss_bg if args.use_ent: loss_entmin = criterion_ent(output, data["labels_t"], ignore_index=255) loss += 10. * loss_entmin if args.use_lap: loss_lap = torch.trace( torch.mm(output.transpose(1, 0), torch.mm(L_mat.type_as(output), output))) / (H * W) gamma = 1e-2 loss += gamma * loss_lap # loss = F.nll_loss(output, labels_t, ignore_index=255) if loss is None: print("skip this image: ", data["img_name"]) break # === for normalize cut # lamda = args.lamda # n_cut = 0. # if args.use_regular_NCut: # W = gaussian_propagator(output) # d = torch.sum(W, dim=1) # for k in range(output.shape[1]): # s = output[idx_test_t, k] # n_cut = n_cut + torch.mm( # torch.mm(torch.unsqueeze(s, 0), W), # torch.unsqueeze(1 - s, 1)) / (torch.dot(d, s)) # === calculus loss & updated parameters # loss_train = loss.cuda() + lamda * n_cut loss_train = loss.cuda() loss_train.backward() optimizer.step() # === save predcit mask at max epoch & IoU of img if (epoch + 1) % args.max_epoch == 0 and args.save_mask: t_now = time.time() if not kwargs["debug"]: evaluate_IoU(model=model, features=data["features_t"], adj=data["adj_t"], img_name=data["img_name"], epoch=args.max_epoch, img_idx=ii + 1, writer=writer, IoU=IoU, IoU_CRF=IoU_CRF, use_CRF=False, save_prediction_np=True) print("[{}/{}] time: {:.4f}s\n\n".format( ii + 1, len(train_dataloader), t_now - t4epoch)) t4epoch = t_now # end for epoch # print( # "loss: {} | loss_fg: {} | loss_bg:{} | loss_entmin: {} | loss_lap: {}" # .format(loss.data.item(), loss_fg.data.item(), loss_bg.data.item(), # loss_entmin.data.item(), loss_lap.data.item())) # end for dataloader if kwargs["debug"] is False: writer.close() print("training was Finished!") print("Total time elapsed: {:.0f} h {:.0f} m {:.0f} s\n".format( (time.time() - t_start) // 3600, (time.time() - t_start) / 60 % 60, (time.time() - t_start) % 60))
def gcn_train(**kwargs): """ GCN training --- - the folder you need: - args.path4AffGraph - args.path4node_feat - path4partial_label - these folder would be created: - data/GCN4DeepLab/Label - data/GCN4DeepLab/Logit """ t_start = time.time() # update config args.parse(**kwargs) device = torch.device("cuda:" + str(kwargs["GPU"])) print(device) # tensorboard if args.use_TB: time_now = datetime.datetime.today() time_now = "{}-{}-{}|{}-{}".format(time_now.year, time_now.month, time_now.day, time_now.hour, time_now.minute // 30) keys_ignore = ["start_index", "GPU"] comment_init = '' for k, v in kwargs.items(): if k not in keys_ignore: comment_init += '|{} '.format(v) writer = SummaryWriter( logdir='runs/{}/{}'.format(time_now, comment_init)) # initial IoUMetric object for evaluation IoU = IOUMetric(args.num_class) # initial dataset train_dataloader = graph_voc(start_idx=kwargs["start_index"], end_idx=kwargs["end_index"], device=device) # train a seperate GCN for each image t4epoch = time.time() for ii, data in enumerate(train_dataloader): if data is None: continue img_label = load_image_label_from_xml(img_name=data["img_name"], voc12_root=args.path4VOC_root) img_class = [idx + 1 for idx, f in enumerate(img_label) if int(f) == 1] num_class = np.max(img_class) + 1 model = GCN(nfeat=data["features_t"].shape[1], nhid=args.num_hid_unit, nclass=args.num_class, dropout=args.drop_rate) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # put data into GPU if args.cuda: model.to(device) data["features_t"] = data["features_t"].to(device) data["adj_t"] = data["adj_t"].to(device) data["labels_t"] = data["labels_t"].to(device) data["label_fg_t"] = data["label_fg_t"].to(device) data["label_bg_t"] = data["label_bg_t"].to(device) t_be = time.time() H, W, C = data["rgbxy_t"].shape N = H * W # laplacian if args.use_lap: L_mat = compute_lap_test(data, device, radius=2).to(device) print("Time for laplacian {:3.1f} s".format(time.time() - t_be)) criterion_ent = HLoss() for epoch in range(args.max_epoch): model.train() optimizer.zero_grad() output = model(data["features_t"], data["adj_t"]) # foreground and background loss loss_fg = F.nll_loss(output, data["label_fg_t"], ignore_index=255) loss_bg = F.nll_loss(output, data["label_bg_t"], ignore_index=255) loss = loss_fg + loss_bg if args.use_ent: loss_entmin = criterion_ent(output, data["labels_t"], ignore_index=255) loss += 10. * loss_entmin if args.use_lap: loss_lap = torch.trace( torch.mm(output.transpose(1, 0), torch.mm(L_mat.type_as(output), output))) / N gamma = 1e-2 loss += gamma * loss_lap if loss is None: print("skip this image: ", data["img_name"]) break loss_train = loss.cuda() loss_train.backward() optimizer.step() # save predicted mask and IoU at max epoch if (epoch + 1) % args.max_epoch == 0 and args.save_mask: t_now = time.time() evaluate_IoU(model=model, features=data["features_t"], adj=data["adj_t"], img_name=data["img_name"], img_idx=ii + 1, writer=writer, IoU=IoU, save_prediction_np=True) print("evaluate time: {:3.1f} s".format(time.time() - t_now)) print("[{}/{}] time: {:.1f}s\n\n".format( ii + 1, len(train_dataloader), t_now - t4epoch)) t4epoch = t_now print("======================================") if writer is not None: writer.close() print("training was Finished!") print("Total time elapsed: {:.0f} h {:.0f} m {:.0f} s\n".format( (time.time() - t_start) // 3600, (time.time() - t_start) / 60 % 60, (time.time() - t_start) % 60))