示例#1
0
 def save_pseudo_label(seg_score,
                       seg_label,
                       destination,
                       img_name=None,
                       save_npy=True):
     """
     save label and label scores
     - img_name: str, only file name, not include extension or path
     - seg_score: numpy array, shape: [num_class,H,W] 
     - seg_label: numpy array, shape: [H,W] 
     """
     pseudo_label_dict = dict()
     img_label = load_image_label_from_xml(img_name=img_name,
                                           voc12_root=args.path4VOC_root)
     pseudo_label_dict[0] = seg_score[0]
     # VOC dataset: key range 0~20
     for key in img_label:
         pseudo_label_dict[int(key + 1)] = seg_score[int(key + 1)]
     # save score
     if save_npy:
         destination_np = destination4logit
         if not os.path.exists(destination_np):
             os.mkdir(destination_np)
         np.save(os.path.join(destination_np, img_name), pseudo_label_dict)
     # save mask
     misc_old.toimage(seg_label, cmin=0, cmax=255, pal=colors_map,
                      mode='P').save(
                          os.path.join(destination,
                                       "{}.png".format(img_name)))
示例#2
0
文件: train.py 项目: Xavier-Pan/WSGCN
def postprocess_image_save(model_output, img_name, save_prediction_np=False):
    """
    1.upsample prediction scores
    2.save prediction scores (option)
    3.save prediction mask
    """
    # load image as nd_array
    img = imread(os.path.join(args.path4Image, img_name + ".jpg"))
    H_original, W_original, C = img.shape
    H = int(np.ceil(H_original / args.output_rate))
    W = int(np.ceil(W_original / args.output_rate))

    # [H*W,num_class] -> [num_class,H,W]
    model_output = model_output.reshape(H, W,
                                        model_output.size()[-1]).permute(
                                            2, 0, 1)

    # [C,H,W] -> [1,C,H,W]
    model_output = model_output.unsqueeze(dim=0)
    # 1.upsample the predicted mask
    upsampling = torch.nn.Upsample(size=(H_original, W_original),
                                   mode='bilinear',
                                   align_corners=True)
    # [C,H,W] -> [C, H_original, W_original]
    up_predict = upsampling(model_output).squeeze(dim=0)

    # [C, H_original, W_original] -> [1, H_original, W_original]
    up_predict_mask = torch.argmax(up_predict, dim=0)

    # 2.save the prediction score
    if save_prediction_np:
        path = args.path4GCN_logit
        if not os.path.exists(path):
            os.makedirs(path)
            print("GCN prediction save path:", path)
        # p = exp{log(p)}
        up_predict_np = torch.exp(up_predict.clone()).cpu().numpy()
        img_label = load_image_label_from_xml(img_name=img_name,
                                              voc12_root=args.path4VOC_root)
        predict_dict = dict()
        predict_dict[0] = up_predict_np[0]
        for idx, cls_ in enumerate(img_label):
            if int(cls_) > 0:
                print("key:{} ID:{}".format(idx + 1, SEG_ID_TO_NAME[idx + 1]))
                predict_dict[idx + 1] = up_predict_np[idx + 1]
        np.save(os.path.join(path, img_name + ".npy"), predict_dict)

    # 3.save the prediction as label
    path4save = args.path4GCN_label
    if not os.path.isdir(path4save):
        os.makedirs(path4save)
    misc_old.toimage(up_predict_mask.cpu().numpy(),
                     cmin=0,
                     cmax=255,
                     pal=colors_map,
                     mode='P').save(os.path.join(path4save, img_name + '.png'))
    print("Postprocessing image:{} save in {}".format(img_name, path4save))
示例#3
0
    def save_pseudo_label(seg_score,
                          seg_label,
                          destination,
                          img_name="2007_000032",
                          save_npy=True):
        """
        Save Label and Label Score to `.png` and dictionary
        ===
        - label would be upsample to save
        - `img_name`: str, only file name, not include extension or path
        - `seg_score`: numpy array, shape: [num_class,H,W] 
        - `seg_label`: numpy array, shape: [H,W] 
        """
        if not os.path.exists(destination):
            os.mkdir(destination)

        pseudo_label_dict = dict()
        img_label = load_image_label_from_xml(img_name=img_name,
                                              voc12_root=args.path4VOC_root)
        pseudo_label_dict[0] = seg_score[0]
        # key range from 0~20 if you use VOC dataset
        for key in img_label:  # img_label +1 = segmentation_label
            # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
            # pseudo_label_dict[int(key)] or pseudo_label_dict[int(key+1)] ???
            pseudo_label_dict[int(key + 1)] = seg_score[int(key + 1)]
            # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
        # save score
        if save_npy:
            destination_np = destination4logit
            if not os.path.exists(destination_np):
                os.mkdir(destination_np)
            np.save(os.path.join(destination_np, img_name), pseudo_label_dict)
        # Save label mask
        scipy.misc.toimage(seg_label,
                           cmin=0,
                           cmax=255,
                           pal=colors_map,
                           mode='P').save(
                               os.path.join(destination,
                                            "{}.png".format(img_name)))
示例#4
0
def postprocess_image_save(epoch,
                           model_output,
                           img_name="2007_009788",
                           save_prediction_np=False,
                           save_CRF=True,
                           rgbxy_t=None):
    """
    1.upsampling prediction_score
    2.save prediction scores (option)
    3.save prediction mask
    """
    # load image as nd_array
    img = imread(os.path.join(args.path4img, img_name + ".jpg"))
    H_original, W_original, C = img.shape
    H, W = int(np.ceil(H_original / args.output_rate)), int(
        np.ceil(W_original / args.output_rate))

    # === tansfer shape(H*W,num_class) -> shape(num_class,H,W)
    model_output = model_output.reshape(H, W,
                                        model_output.size()[-1]).permute(
                                            2, 0, 1)
    # === use bilinear to upsample the predicted mask
    upsampling = torch.nn.Upsample(size=(H_original, W_original),
                                   mode='bilinear',
                                   align_corners=True)
    model_output = model_output.unsqueeze(dim=0)  # [1,C,H,W]
    up_predict = upsampling(model_output).squeeze(
        dim=0)  # [C,H_original,W_original]
    # >>>>>>>>>> label propagation
    if args.use_LP:
        # [H,W,3]
        img = np.array(
            Image.open(os.path.join(args.path4Image, img_name + '.jpg')))
        rgb = torch.Tensor(img / 255.)  # [H_original,W_original,3]
        rgb = rgb.reshape(H_original * W_original,
                          3)  # [H_original*W_original,3]
        W = gaussian_propagator(features=rgb)  # [H_ori*W_ori,H_ori*W_ori]
        input("W.shape {}".format(W.shape))
        identity = torch.eye(n=W.shape[0])
        D_inv = torch.pow(torch.diag(torch.sum(W, dim=1)).inverse(),
                          exponent=.5)
        S = D_inv.matmul(W.matmul(D_inv))
        # [C,H,W]-> [C,H*W] -> [H_ori*W_ori,C]
        pred_LP = torch.matmul(
            torch.inverse(identity.cuda() - args.alpha4LP * S),
            up_predict.reshape(-1, W.shape[0]).permute(1,
                                                       0))  # [H_ori*W_ori,21]
        pred_LP = pred_LP.argmax(dim=1)  # [H_ori*W_ori]
        pred_LP = pred_LP.reshape(H_original, W_original)  # [H_ori,W_ori]
        # ============= save LP =================
        if not os.path.isdir(args.path4save_LP):
            os.makedirs(args.path4save_LP)
        scipy.misc.toimage(pred_LP.cpu().numpy(),
                           cmin=0,
                           cmax=255,
                           pal=colors_map,
                           mode='P').save(
                               os.path.join(args.path4save_LP,
                                            img_name + '.png'))

    # >>>>>>>>>>
    up_predict_mask = torch.argmax(up_predict, dim=0)
    # === save the prediction score in dictionary
    # === in os.path.join(args.path4GCN_logit, str(epoch) )
    if save_prediction_np:
        path = args.path4GCN_logit
        if not os.path.exists(path):
            os.makedirs(path)
            print("GCN prediction save path:", path)
        up_predict_np = torch.exp(up_predict.clone()).cpu().numpy()
        img_label = load_image_label_from_xml(img_name=img_name,
                                              voc12_root=args.path4VOC_root)
        predict_dict = dict()
        predict_dict[0] = up_predict_np[0]
        for idx, cls_ in enumerate(img_label):
            if int(cls_) > 0:
                print("key:{} ID:{}".format(idx + 1, SEG_ID_TO_NAME[idx + 1]))
                # === note that model prediction is log(p) -> p = exp{log(p)}
                predict_dict[idx + 1] = up_predict_np[idx + 1]
        np.save(os.path.join(path, img_name + ".npy"), predict_dict)

    # === save the prediction as label
    # === in os.path.join(path4save, img_name + '.png')
    path4save = args.path4GCN_label
    if not os.path.isdir(path4save):
        os.makedirs(path4save)
    scipy.misc.toimage(up_predict_mask.cpu().numpy(),
                       cmin=0,
                       cmax=255,
                       pal=colors_map,
                       mode='P').save(
                           os.path.join(path4save, img_name + '.png'))
    print("image:{} save in {}!\n".format(img_name, epoch))
示例#5
0
def train(**kwargs):
    """
    GCN training
    ---
    - the folder you need:
        - args.path4AffGraph
        - args.path4node_feat
        - path4partial_label
    - these folder would be created:
        - data/GCN_prediction/label
        - data/GCN_prediction/logit
    """
    # os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, [0, 1, 2, 3]))
    t_start = time.time()
    # 根据命令行参数更新配置
    args.parse(**kwargs)
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = torch.device("cuda:" + str(kwargs["GPU"]))
    print(device)
    # 把有改動的參數寫到tensorboard名稱上
    if kwargs["debug"] is False:
        comment_init = ''
        for k, v in kwargs.items():
            comment_init += '|{} '.format(v)
        writer = SummaryWriter(comment=comment_init)

    # === set evaluate object for evaluate later
    IoU = IOUMetric(args.num_class)
    IoU_CRF = IOUMetric(args.num_class)

    # === dataset
    train_dataloader = graph_voc(start_idx=kwargs["start_index"],
                                 end_idx=kwargs["end_index"],
                                 device=device)

    # === for each image, do training and testing in the same graph
    # for ii, (adj_t, features_t, labels_t, rgbxy_t, img_name, label_fg_t,
    #          label_bg_t) in enumerate(train_dataloader):
    t4epoch = time.time()
    for ii, data in enumerate(train_dataloader):
        if data is None:
            continue
        # === use RGBXY as feature
        # if args.use_RGBXY:
        #     data["rgbxy_t"] = normalize_rgbxy(data["rgbxy_t"])
        #     features_t = data["rgbxy_t"].clone()
        # === only RGB as feature
        t_be = time.time()
        if args.use_lap:
            """ is constructing................ """
            H, W, C = data["rgbxy_t"].shape
            A = torch.zeros([H * W, H * W], dtype=torch.float64)

            def find_neibor(card_x, card_y, H, W, radius=2):
                """
                Return idx of neibors of (x,y) in list
                ---
                """
                neibors_idx = []
                for idx_x in np.arange(card_x - radius, card_x + radius + 1):
                    for idx_y in np.arange(card_y - radius,
                                           card_y + radius + 1):
                        if (-radius < idx_x < H) and (-radius < idx_y < W):
                            neibors_idx.append(
                                (idx_x * W + idx_y, idx_x, idx_y))
                return neibors_idx

            t_start = time.time()
            t_start = t4epoch
            neibors = dict()
            for node_idx in range(H * W):
                card_x, card_y = node_idx // W, node_idx % W
                neibors = find_neibor(card_x, card_y, H, W, radius=1)
                # print("H:{} W:{} | {} -> ({},{})".format(
                # H, W, node_idx, card_x, card_y))
                for nei in neibors:
                    # print("nei: ", nei)
                    diff_rgb = data["rgbxy_t"][
                        card_x, card_y, :3] - data["rgbxy_t"][nei[1],
                                                              nei[2], :3]
                    diff_xy = data["rgbxy_t"][card_x, card_y,
                                              3:] - data["rgbxy_t"][nei[1],
                                                                    nei[2], 3:]
                    A[node_idx, nei[0]] = torch.exp(
                        -torch.pow(torch.norm(diff_rgb), 2) /
                        (2. * args.CRF_deeplab["bi_rgb_std"])) + torch.exp(
                            -torch.pow(torch.norm(diff_xy), 2) /
                            (2. * args.CRF_deeplab["bi_xy_std"]))
            # print("{:3.1f}s".format(time.time() - t_start))
            D = torch.diag(A.sum(dim=1))
            L_mat = D - A
        print("time for Laplacian {:3f} s".format(time.time() - t_be))
        # === Model and optimizer
        img_label = load_image_label_from_xml(img_name=data["img_name"],
                                              voc12_root=args.path4VOC_root)
        img_class = [idx + 1 for idx, f in enumerate(img_label) if int(f) == 1]
        num_class = np.max(img_class) + 1
        # debug("num_class: {}  {}".format(num_class + 1, type(num_class + 1)),
        #       line=290)
        model = GCN(
            nfeat=data["features_t"].shape[1],
            nhid=args.num_hid_unit,
            # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
            # image label don't have BG
            # adaptive num_class should have better performance
            nclass=args.num_class,  # args.num_class| num_class
            # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
            dropout=args.drop_rate)
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)

        # ==== moving tensor to GPU
        if args.cuda:
            model.to(device)
            data["features_t"] = data["features_t"].to(device)
            data["adj_t"] = data["adj_t"].to(device)
            data["labels_t"] = data["labels_t"].to(device)
            data["label_fg_t"] = data["label_fg_t"].to(device)
            data["label_bg_t"] = data["label_bg_t"].to(device)
            # L_mat = L_mat.to(device)

        # === save the prediction before training
        if args.save_mask_before_train:
            model.eval()
            postprocess_image_save(img_name=data["img_name"],
                                   model_output=model(data["features_t"],
                                                      data["adj_t"]).detach(),
                                   epoch=0)

        # ==== Train model
        # t4epoch = time.time()
        criterion_ent = HLoss()
        # criterion_sym = symmetricLoss()

        for epoch in range(args.max_epoch):
            model.train()
            optimizer.zero_grad()
            output = model(data["features_t"], data["adj_t"])

            # === seperate FB/BG label
            loss_fg = F.nll_loss(output, data["label_fg_t"], ignore_index=255)
            loss_bg = F.nll_loss(output, data["label_bg_t"], ignore_index=255)
            # F.log_softmax(label_fg_t, dim=1)
            # loss_sym = criterion_sym(output, labels_t, ignore_index=255)
            loss = loss_fg + loss_bg
            if args.use_ent:
                loss_entmin = criterion_ent(output,
                                            data["labels_t"],
                                            ignore_index=255)
                loss += 10. * loss_entmin
            if args.use_lap:
                loss_lap = torch.trace(
                    torch.mm(output.transpose(1, 0),
                             torch.mm(L_mat.type_as(output),
                                      output))) / (H * W)
                gamma = 1e-2
                loss += gamma * loss_lap
            # loss = F.nll_loss(output, labels_t, ignore_index=255)

            if loss is None:
                print("skip this image: ", data["img_name"])
                break

            # === for normalize cut
            # lamda = args.lamda
            # n_cut = 0.
            # if args.use_regular_NCut:
            #     W = gaussian_propagator(output)
            #     d = torch.sum(W, dim=1)
            #     for k in range(output.shape[1]):
            #         s = output[idx_test_t, k]
            #         n_cut = n_cut + torch.mm(
            #             torch.mm(torch.unsqueeze(s, 0), W),
            #             torch.unsqueeze(1 - s, 1)) / (torch.dot(d, s))

            # === calculus loss & updated parameters
            # loss_train = loss.cuda() + lamda * n_cut
            loss_train = loss.cuda()
            loss_train.backward()
            optimizer.step()

            # === save predcit mask at max epoch & IoU of img
            if (epoch + 1) % args.max_epoch == 0 and args.save_mask:
                t_now = time.time()
                if not kwargs["debug"]:
                    evaluate_IoU(model=model,
                                 features=data["features_t"],
                                 adj=data["adj_t"],
                                 img_name=data["img_name"],
                                 epoch=args.max_epoch,
                                 img_idx=ii + 1,
                                 writer=writer,
                                 IoU=IoU,
                                 IoU_CRF=IoU_CRF,
                                 use_CRF=False,
                                 save_prediction_np=True)
                print("[{}/{}] time: {:.4f}s\n\n".format(
                    ii + 1, len(train_dataloader), t_now - t4epoch))
                t4epoch = t_now
        # end for epoch
        # print(
        #     "loss: {} | loss_fg: {} | loss_bg:{} | loss_entmin: {} | loss_lap: {}"
        #     .format(loss.data.item(), loss_fg.data.item(), loss_bg.data.item(),
        #             loss_entmin.data.item(), loss_lap.data.item()))
    # end for dataloader
    if kwargs["debug"] is False:
        writer.close()
    print("training was Finished!")
    print("Total time elapsed: {:.0f} h {:.0f} m {:.0f} s\n".format(
        (time.time() - t_start) // 3600, (time.time() - t_start) / 60 % 60,
        (time.time() - t_start) % 60))
示例#6
0
文件: train.py 项目: Xavier-Pan/WSGCN
def gcn_train(**kwargs):
    """
    GCN training
    ---
    - the folder you need:
        - args.path4AffGraph
        - args.path4node_feat
        - path4partial_label
    - these folder would be created:
        - data/GCN4DeepLab/Label
        - data/GCN4DeepLab/Logit
    """
    t_start = time.time()
    # update config
    args.parse(**kwargs)
    device = torch.device("cuda:" + str(kwargs["GPU"]))
    print(device)

    # tensorboard
    if args.use_TB:
        time_now = datetime.datetime.today()
        time_now = "{}-{}-{}|{}-{}".format(time_now.year, time_now.month,
                                           time_now.day, time_now.hour,
                                           time_now.minute // 30)

        keys_ignore = ["start_index", "GPU"]
        comment_init = ''
        for k, v in kwargs.items():
            if k not in keys_ignore:
                comment_init += '|{} '.format(v)
        writer = SummaryWriter(
            logdir='runs/{}/{}'.format(time_now, comment_init))

    # initial IoUMetric object for evaluation
    IoU = IOUMetric(args.num_class)

    # initial dataset
    train_dataloader = graph_voc(start_idx=kwargs["start_index"],
                                 end_idx=kwargs["end_index"],
                                 device=device)

    # train a seperate GCN for each image
    t4epoch = time.time()
    for ii, data in enumerate(train_dataloader):
        if data is None:
            continue
        img_label = load_image_label_from_xml(img_name=data["img_name"],
                                              voc12_root=args.path4VOC_root)
        img_class = [idx + 1 for idx, f in enumerate(img_label) if int(f) == 1]
        num_class = np.max(img_class) + 1
        model = GCN(nfeat=data["features_t"].shape[1],
                    nhid=args.num_hid_unit,
                    nclass=args.num_class,
                    dropout=args.drop_rate)
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)

        # put data into GPU
        if args.cuda:
            model.to(device)
            data["features_t"] = data["features_t"].to(device)
            data["adj_t"] = data["adj_t"].to(device)
            data["labels_t"] = data["labels_t"].to(device)
            data["label_fg_t"] = data["label_fg_t"].to(device)
            data["label_bg_t"] = data["label_bg_t"].to(device)

        t_be = time.time()

        H, W, C = data["rgbxy_t"].shape
        N = H * W
        # laplacian
        if args.use_lap:
            L_mat = compute_lap_test(data, device, radius=2).to(device)
            print("Time for laplacian {:3.1f} s".format(time.time() - t_be))

        criterion_ent = HLoss()
        for epoch in range(args.max_epoch):
            model.train()
            optimizer.zero_grad()
            output = model(data["features_t"], data["adj_t"])

            # foreground and background loss
            loss_fg = F.nll_loss(output, data["label_fg_t"], ignore_index=255)
            loss_bg = F.nll_loss(output, data["label_bg_t"], ignore_index=255)
            loss = loss_fg + loss_bg
            if args.use_ent:
                loss_entmin = criterion_ent(output,
                                            data["labels_t"],
                                            ignore_index=255)
                loss += 10. * loss_entmin
            if args.use_lap:
                loss_lap = torch.trace(
                    torch.mm(output.transpose(1, 0),
                             torch.mm(L_mat.type_as(output), output))) / N

                gamma = 1e-2
                loss += gamma * loss_lap

            if loss is None:
                print("skip this image: ", data["img_name"])
                break

            loss_train = loss.cuda()
            loss_train.backward()
            optimizer.step()

            # save predicted mask and IoU at max epoch
            if (epoch + 1) % args.max_epoch == 0 and args.save_mask:
                t_now = time.time()
                evaluate_IoU(model=model,
                             features=data["features_t"],
                             adj=data["adj_t"],
                             img_name=data["img_name"],
                             img_idx=ii + 1,
                             writer=writer,
                             IoU=IoU,
                             save_prediction_np=True)
                print("evaluate time: {:3.1f} s".format(time.time() - t_now))
                print("[{}/{}] time: {:.1f}s\n\n".format(
                    ii + 1, len(train_dataloader), t_now - t4epoch))
                t4epoch = t_now
                print("======================================")

    if writer is not None:
        writer.close()
    print("training was Finished!")
    print("Total time elapsed: {:.0f} h {:.0f} m {:.0f} s\n".format(
        (time.time() - t_start) // 3600, (time.time() - t_start) / 60 % 60,
        (time.time() - t_start) % 60))