示例#1
0
def get_trace(config_path, weights_path, out_path):
    """
    Gets the pose estimation network traced script,
    for C++ environments deployment.
    Arguments:
    config_path  : path to the network configuration file.
    weights_path : path to the network pretrained weights file.
    out_path     : path to the output traced script file.
    """
    # parse config data and load network
    data_options = read_data_cfg(config_path)
    model = SegPoseNet(data_options, False)
    print('Building network graph ... Done!')

    # print network and load weights
    model.load_weights(weights_path)
    print('Loading weights from %s... Done!' % (weights_path))

    # get model traced script
    input_sample = torch.rand(1, model.channels, model.height, model.width)
    traced_script_module = torch.jit.trace(model, input_sample)
    print('Getting network traced script ... Done!')

    # save model traced script
    traced_script_module.save(out_path)
    print('Saving network traced script ... Done!')
def evaluate(data_cfg,
             weightfile,
             listfile,
             outdir,
             object_names,
             intrinsics,
             vertex,
             bestCnt,
             conf_thresh,
             linemod_index=False,
             use_gpu=False,
             gpu_id='0'):
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    data_options = read_data_cfg(data_cfg)
    m = SegPoseNet(data_options)

    m.print_network()
    m.load_weights(weightfile)
    print('Loading weights from %s... Done!' % (weightfile))

    if use_gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpu_id
        m.cuda()

    with open(listfile, 'r') as file:
        imglines = file.readlines()

    for idx in range(len(imglines)):
        imgfile = imglines[idx].rstrip()
        img = cv2.imread(imgfile)

        dirname, filename = os.path.split(imgfile)
        baseName, _ = os.path.splitext(filename)
        if linemod_index:
            outFileName = baseName[-4:]
        else:
            dirname = os.path.splitext(dirname[dirname.rfind('/') + 1:])[0]
            outFileName = dirname + '_' + baseName

        start = time.time()
        predPose = do_detect(m, img, intrinsics, bestCnt, conf_thresh, use_gpu)
        finish = time.time()

        arch = 'CPU'
        if use_gpu:
            arch = 'GPU'
        print('%s: Predict %d objects in %f seconds (on %s).' %
              (imgfile, len(predPose), (finish - start), arch))
        print("Prediction saved!", outFileName, predPose, outdir)
        save_predictions(outFileName, predPose, object_names, outdir)

        # visualize predictions
        vis_start = time.time()
        visImg = visualize_predictions(predPose, img, vertex, intrinsics)
        cv2.imwrite(outdir + '/' + outFileName + '.jpg', visImg)
        vis_finish = time.time()
        print('%s: Visualization in %f seconds.' % (imgfile,
                                                    (vis_finish - vis_start)))
示例#3
0
文件: api.py 项目: pawanw17/grasping
def configure_network(cfg_file='./configs/data-YCB.cfg',
                      weights_file='./models/ckpt_final.pth',
                      use_gpu=True):
    """
    API function to configure the pose estimation network.
    Arguments:
    cfg_file     : path to config file.
    weights_file : path to pretrained weights file.
    use_gpu      : whether to use GPU or not.
    """
    # parse config data and load network
    data_options = read_data_cfg(cfg_file)
    model = SegPoseNet(data_options, False)
    print('Building network graph ... Done!')

    # print network and load weights
    model.load_weights(weights_file)
    print('Loading weights from %s... Done!' % (weights_file))

    # device selection
    device = torch.device('cuda' if (
        torch.cuda.is_available() and use_gpu) else 'cpu')
    model.to(device)
    print(f'Performing Inference on {device}')

    # return model object
    return model
def evaluate(data_cfg,
             weightfile,
             listfile,
             outdir,
             object_names,
             intrinsics,
             vertex,
             bestCnt,
             conf_thresh,
             linemod_index=False,
             use_gpu=False,
             gpu_id='0'):
    '''

    :param data_cfg: dataset config, './data/data-LINEMOD.cfg'
    :param weightfile: network pre-trained weight
    :param listfile: image list
    :param outdir: output dir
    :param object_names: classes
    :param intrinsics: camera intrinsic
    :param vertex: './data/Occluded-LINEMOD/LINEMOD_vertex.npy'
    :param bestCnt: default=10, the number of points chosed for one corner
    :param conf_thresh: default=0.3
    :param linemod_index: default = True
    :param use_gpu:
    :param gpu_id:
    :return:
    '''
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    data_options = read_data_cfg(data_cfg)
    m = SegPoseNet(data_options)
    m.print_network()
    m.load_weights(weightfile)
    print('Loading weights from %s... Done!' % (weightfile))

    if use_gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpu_id
        #m.cuda()

    with open(listfile, 'r') as file:
        imglines = file.readlines()

    for idx in range(len(imglines)):  # len(imglines)
        imgfile = imglines[idx].rstrip()  #img path
        img = cv2.imread(imgfile)

        dirname, filename = os.path.split(imgfile)
        baseName, _ = os.path.splitext(filename)  # no ".jpg"
        if linemod_index:
            #outFileName = baseName
            outFileName = baseName[-4:]  # no "color"
        else:
            dirname = os.path.splitext(dirname[dirname.rfind('/') + 1:])[0]
            outFileName = dirname + '_' + baseName

        start = time.time()
        predPose = do_detect(m, img, intrinsics, bestCnt, conf_thresh, use_gpu)
        finish = time.time()

        arch = 'CPU'
        if use_gpu:
            arch = 'GPU'
        print('%s: Predict %d objects in %f seconds (on %s).' %
              (imgfile, len(predPose), (finish - start), arch))
        save_predictions(outFileName, predPose, object_names, outdir)

        # visualize predictions
        vis_start = time.time()
        bbx8 = get_bbox8_3d(vertex)
        img2 = img.copy()
        visImg = visualize_bbox(predPose, img, bbx8, intrinsics)
        visImg2 = visualize_predictions(predPose, img2, vertex, intrinsics)
        cv2.imwrite(outdir + '/' + outFileName + '_box.jpg', visImg)
        cv2.imwrite(outdir + '/' + outFileName + '.jpg', visImg2)
        vis_finish = time.time()
        print('%s: Visualization in %f seconds.' % (imgfile,
                                                    (vis_finish - vis_start)))
示例#5
0
def train(cfg_path):
    # network initialization
    data_options = read_data_cfg(cfg_path)
    model = SegPoseNet(data_options, is_train=True)

    # load pretained weights
    if pretrained_weights_path is not None:
        model.load_weights(pretrained_weights_path)
        print('Darknet weights loaded from ', pretrained_weights_path)

    # get input/output dimensions
    img_h = model.height
    img_w = model.width
    out_h = model.output_h
    out_w = model.output_w

    # print network graph
    model.print_network()

    model.train()

    bias_acc = meters()

    # optimizer initialization
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=initial_lr,
                                momentum=momentum,
                                weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer,
        [int(0.5 * num_epoch),
         int(0.75 * num_epoch),
         int(0.9 * num_epoch)],
        gamma=0.1)

    # device selection
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    # dataset initialization
    train_dataset = YCBDataset(ycb_data_path,
                               imageset_path,
                               syn_data_path=syn_data_path,
                               target_h=out_h,
                               target_w=out_w,
                               use_real_img=use_real_img,
                               bg_path=bg_path,
                               num_syn_images=num_syn_img,
                               data_cfg=data_cfg,
                               kp_path=kp_path)
    if not os.path.isfile("data/balancing_weight.pkl"):
        train_dataset.gen_balancing_weight()
    train_dataset.set_balancing_weight()
    train_dataset.gen_kp_gt()
    median_balancing_weight = train_dataset.weight_cross_entropy.to(device)

    print('training on %d images' % len(train_dataset))
    if gen_kp_gt:
        train_dataset.gen_kp_gt()

    # loss configurations
    seg_loss = FocalLoss(alpha=1.0,
                         gamma=2.0,
                         weights=median_balancing_weight,
                         reduce=True)
    pos_loss = nn.L1Loss()
    pos_loss_factor = 1.5
    conf_loss = nn.L1Loss()
    conf_loss_factor = 1.0

    # train/val split
    train_db, val_db = torch.utils.data.random_split(
        train_dataset, [len(train_dataset) - 2000, 2000])

    train_loader = torch.utils.data.DataLoader(dataset=train_db,
                                               batch_size=batch_size,
                                               num_workers=num_workers,
                                               shuffle=True)
    val_loader = torch.utils.data.DataLoader(dataset=val_db,
                                             batch_size=batch_size,
                                             num_workers=num_workers,
                                             shuffle=True)
    # train model
    total_step = len(train_loader)
    # loop over number of epochs
    for epoch in range(num_epoch):
        i = 0
        for images, seg_label, kp_gt_x, kp_gt_y, mask_front in tqdm(
                train_loader):
            i += 1
            # data to device
            images = images.to(device)
            seg_label = seg_label.to(device)
            kp_gt_x = kp_gt_x.to(device)
            kp_gt_y = kp_gt_y.to(device)
            mask_front = mask_front.to(device)

            # forward pass
            output = model(images)

            # segmentation
            pred_seg = output[0]  # (B,OH,OW,C)
            seg_label = seg_label.view(-1)

            l_seg = seg_loss(pred_seg, seg_label)

            # regression
            mask_front = mask_front.repeat(number_point, 1, 1, 1).permute(
                1, 2, 3, 0).contiguous()  # (B,OH,OW,NV)
            pred_x = output[1][0] * mask_front  # (B,OH,OW,NV)
            pred_y = output[1][1] * mask_front
            kp_gt_x = kp_gt_x.float() * mask_front
            kp_gt_y = kp_gt_y.float() * mask_front
            l_pos = pos_loss(pred_x, kp_gt_x) + pos_loss(pred_y, kp_gt_y)

            # confidence
            conf = output[1][2] * mask_front  # (B,OH,OW,NV)
            bias = torch.sqrt((pred_y - kp_gt_y)**2 + (pred_x - kp_gt_x)**2)
            conf_target = torch.exp(-modulating_factor * bias) * mask_front
            conf_target = conf_target.detach()
            l_conf = conf_loss(conf, conf_target)

            # combine all losses
            all_loss = l_seg + l_pos * pos_loss_factor + l_conf * conf_loss_factor
            optimizer.zero_grad()
            all_loss.backward()
            optimizer.step()

            if (i + 1) % 100 == 0:
                # compute pixel-wise bias to measure training accuracy
                bias_acc.update(
                    abs(pnz((pred_x - kp_gt_x).cpu()).mean() * img_w))
                # write losses to tensorboard writer
                writer.add_scalar('seg_loss', l_seg.item(),
                                  epoch * total_step + i)
                writer.add_scalar('pos loss', l_pos.item(),
                                  epoch * total_step + i)
                writer.add_scalar('conf_loss', l_conf.item(),
                                  epoch * total_step + i)
                writer.add_scalar('pixel_wise bias', bias_acc.value,
                                  epoch * total_step + i)

        # reset pixel_wise bias meter
        bias_acc._reset()
        # LR scheduler step
        scheduler.step()

        # model validation
        with torch.no_grad():
            total_seg_loss = 0
            total_pos_loss = 0
            total_conf_loss = 0
            total_loss = 0
            viz_imgs = []
            j = 0
            for images, seg_label, kp_gt_x, kp_gt_y, mask_front in tqdm(
                    val_loader):
                j += 1
                # data to device
                images = images.to(device)
                seg_label = seg_label.to(device)
                kp_gt_x = kp_gt_x.to(device)
                kp_gt_y = kp_gt_y.to(device)
                mask_front = mask_front.to(device)
                # forward pass
                output = model(images)
                # segmentation
                pred_seg = output[0]
                seg_label = seg_label.view(-1)
                l_seg = seg_loss(pred_seg, seg_label)
                # regression
                mask_front = mask_front.repeat(number_point, 1, 1,
                                               1).permute(1, 2, 3,
                                                          0).contiguous()
                pred_x = output[1][0] * mask_front
                pred_y = output[1][1] * mask_front
                kp_gt_x = kp_gt_x.float() * mask_front
                kp_gt_y = kp_gt_y.float() * mask_front
                l_pos = pos_loss(pred_x, kp_gt_x) + pos_loss(pred_y, kp_gt_y)
                # confidence
                conf = output[1][2] * mask_front
                bias = torch.sqrt((pred_y - kp_gt_y)**2 +
                                  (pred_x - kp_gt_x)**2)
                conf_target = torch.exp(-modulating_factor * bias) * mask_front
                conf_target = conf_target.detach()
                l_conf = conf_loss(conf, conf_target)
                # combine all losses
                all_loss = l_seg + l_pos * pos_loss_factor + l_conf * conf_loss_factor
                total_seg_loss += l_seg.item()
                total_pos_loss += l_pos.item()
                total_conf_loss += l_conf.item()
                total_loss += all_loss.item()
                # data visualization
                if (j + 1) % 100 == 0:
                    model.eval()  # change network to eval mode
                    output = model(images)  # perform inference
                    pred_pose = fusion(output, img_width, img_height,
                                       intrinsics, conf_thresh, batch_idx,
                                       best_cnt)  # output fusion
                    image = np.uint8(
                        convert2cpu(
                            images[batch_idx]).detach().numpy().transpose(
                                1, 2, 0) * 255.0)  # get image
                    image = resize(image,
                                   (img_height, img_width))  # resize image
                    viz_img = visualize_predictions(pred_pose, image, vertices,
                                                    intrinsics).transpose(
                                                        2, 0,
                                                        1)  # visualize poses
                    viz_imgs.append(viz_img)  # append to visualizations
                    model.train()  # change network to train mode
            # print total validation losses
            print(
                'Epoch [{}/{}], Validation Loss: \n seg loss: {:.4f}, pos loss: {:.4f}, conf loss: {:.4f}, total loss: {:.4f}'
                .format(epoch + 1, num_epoch, total_seg_loss, total_pos_loss,
                        total_conf_loss, total_loss))
            # write visualizations to tensorboard writer
            viz_data = np.stack(viz_imgs, axis=0)
            writer.add_images('pose_viz',
                              torch.from_numpy(viz_data),
                              global_step=epoch + 1)

        # save model checkpoint per epoch
        model.save_weights(os.path.join(checkpoints_dir, f'ckpt_{epoch}.pth'))

    # save final model checkpoint
    model.save_weights(os.path.join(checkpoints_dir, 'ckpt_final.pth'))
    writer.close()
示例#6
0
def evaluate(data_cfg,
             weightfile,
             listfile,
             outdir,
             object_names,
             intrinsics,
             vertex,
             bestCnt,
             conf_thresh,
             use_gpu=False):
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    data_options = read_data_cfg(data_cfg)
    m = SegPoseNet(data_options)

    m.print_network()
    m.load_weights(weightfile)
    print('Loading weights from %s... Done!' % (weightfile))

    for name, param in m.named_parameters():
        if 'gate' in name:
            print("debug test.py param:", name, param)

    if use_gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpu_id
        m.cuda()

    with open(listfile, 'r') as file:
        imglines = file.readlines()

    euclidian_errors = []
    n_present_detected = {i: 0 for i in range(22)}
    n_present_undetected = {i: 0 for i in range(22)}
    n_absent_detected = {i: 0 for i in range(22)}
    n_absent_undetected = {i: 0 for i in range(22)}
    total = 0
    total_with_class = {i: 0 for i in range(22)}
    points = []

    for idx in range(len(imglines)):
        total += 1

        # skip 7/8 of the testing data (debug)
        #        if total % 8 != 0:
        #            continue

        imgfile = imglines[idx].rstrip()
        img = cv2.imread(imgfile)

        dirname, filename = os.path.split(imgfile)
        baseName, _ = os.path.splitext(filename)

        dirname = os.path.splitext(dirname[dirname.rfind('/') + 1:])[0]
        outFileName = dirname + '_' + baseName

        # domain=1 for real images
        domains = torch.ones(1).long()

        # generate kp gt map of (nH, nW, nV)
        prefix = imgfile[:-10]
        meta = loadmat(prefix + '-meta.mat')
        class_ids = meta['cls_indexes']
        print("debug test.py class_ids:", class_ids)
        label_img = cv2.imread(prefix + "-label.png")[:, :, 0]
        label_img = cv2.resize(label_img, (76, 76),
                               interpolation=cv2.INTER_NEAREST)

        start = time.time()
        predPose, repro_dict = do_detect(m,
                                         img,
                                         intrinsics,
                                         bestCnt,
                                         conf_thresh,
                                         use_gpu,
                                         domains=domains,
                                         seg_save_path=outdir + "/seg-" +
                                         str(idx) + ".jpg")
        finish = time.time()

        in_pkl = prefix + '-bb8_2d.pkl'
        with open(in_pkl, 'rb') as f:
            bb8_2d = pickle.load(f)

        kps_dict = {}
        err_dict = [0] * 22

        # compute keypoints ground truth in pixel
        for i, cid in enumerate(class_ids):
            kp_gt_x = bb8_2d[:, :, 0][i] * 640
            kp_gt_y = bb8_2d[:, :, 1][i] * 480
            kps_dict[cid[0]] = np.stack((kp_gt_x, kp_gt_y), axis=1)

        # compute euclidean error (and number of true/false positive/negative)
        for i, cid in enumerate(class_ids):
            c = int(cid[0])
            if c in label_img:
                total_with_class[c] += 1
                if c in repro_dict:
                    n_present_detected[c] += 1
                else:
                    n_present_undetected[c] += 1
            else:
                if c in repro_dict:
                    n_absent_detected[c] += 1
                else:
                    n_absent_undetected[c] += 1

            if c in kps_dict and c in repro_dict:
                err_dict[c] = np.mean(
                    np.sqrt(
                        np.square(kps_dict[c] - repro_dict[c]).sum(axis=1)))
                points += [kps_dict, repro_dict]
        euclidian_errors.append(err_dict)

        arch = 'CPU'
        if use_gpu:
            arch = 'GPU'
        print('%s: Predict %d objects in %f seconds (on %s).' %
              (imgfile, len(predPose), (finish - start), arch))
        print("Prediction saved!", outFileName, predPose, outdir)
        save_predictions(outFileName, predPose, object_names, outdir)

        # visualize predictions
        vis_start = time.time()

        try:
            visImg = visualize_predictions(predPose, img, vertex, intrinsics)
            cv2.imwrite(outdir + '/' + outFileName + '.jpg', visImg)
        except:
            pass
        vis_finish = time.time()
        print('%s: Visualization in %f seconds.' % (imgfile,
                                                    (vis_finish - vis_start)))

    # save euclidian errors of predictions
    np.save("./euclidian_errors", np.array(euclidian_errors))

    # save results (n false positive, etc...)
    results_scores = [
        n_present_detected, n_present_undetected, n_absent_detected,
        n_absent_undetected, total, total_with_class
    ]
    np.save("./various-results", np.array(results_scores))

    # save points (detected points in 2d after reprojection)
    np.save("./points", np.array(points))
示例#7
0
def train(data_cfg):
    data_options = read_data_cfg(data_cfg)
    m = SegPoseNet(data_options)

    if load_weight_from_path is not None:
        m.load_weights(load_weight_from_path)
        print("Load weights from ", load_weight_from_path)
    i_h = m.height
    i_w = m.width
    o_h = m.output_h
    o_w = m.output_w
    m.print_network()
    m.train()
    bias_acc = meters()
    optimizer = torch.optim.SGD(m.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [int(0.5*num_epoch), int(0.75*num_epoch),
                                                                 int(0.9*num_epoch)], gamma=0.1)
    if use_gpu:
 #       os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible
#        if len(gpu_id) > 1:
        m = torch.nn.DataParallel(m, device_ids=gpu_id)
        m.cuda()

    one_syn_per_batch = False
    syn_min_rate = None

    if batch_size > 1 and ngpu > 1 and adapt:
        one_syn_per_batch = True
        syn_min_rate = batch_size // ngpu
        assert syn_min_rate > 1, "For DA (adapt=True), the batch size must be at least the double of number of GPU"

    train_dataset = YCB_Dataset(ycb_data_path, imageset_path, syn_data_path=syn_data_path, target_h=o_h, target_w=o_w,
                      use_real_img=use_real_img, bg_path=bg_path, syn_range=syn_range, num_syn_images=num_syn_img,
                                data_cfg="data/data-YCB.cfg", kp_path=kp_path, use_bg_img=use_bg_img, one_syn_per_batch = one_syn_per_batch, batch_size = syn_min_rate)
    median_balancing_weight = train_dataset.weight_cross_entropy.cuda() if use_gpu \
        else train_dataset.weight_cross_entropy

    print('training on %d images'%len(train_dataset))

    # for multiflow, need to keep track of the training progress
    m.module.coreModel.total_training_samples = seen + num_epoch * len(train_dataset)
    print('total training samples:', m.module.coreModel.total_training_samples)
    m.module.coreModel.seen = seen


    if gen_kp_gt:
        train_dataset.gen_kp_gt(for_syn=True, for_real=False)

    # Loss configurations

    # use balancing weights for crossentropy log (used in Hu. Segmentation-driven-pose, not used here)
    #seg_loss = nn.CrossEntropyLoss(weight=median_balancing_weight)

    seg_loss = nn.CrossEntropyLoss()
    seg_loss_factor = 1 # 1

    pos_loss = nn.L1Loss()
    pos_loss_factor = 2.6 #2,6

    conf_loss = nn.L1Loss()
    conf_loss_factor = 0.8 #0.8

    disc_loss = nn.CrossEntropyLoss()
    disc_loss_factor = 1

    seg_disc_loss = nn.CrossEntropyLoss()
    seg_disc_loss_factor = 1

    pos_disc_loss = nn.CrossEntropyLoss()
    pos_disc_loss_factor = 1


    # split into train and val
    train_db, val_db = torch.utils.data.random_split(train_dataset, [len(train_dataset)-2000, 2000])

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, # not use validation now
                                               batch_size=batch_size, num_workers=num_workers,
                                               shuffle=True, drop_last = True)
    val_loader = torch.utils.data.DataLoader(dataset=val_db,
                                               batch_size=batch_size,num_workers=num_workers,
                                               shuffle=True)
    # Train the model
    total_step = len(train_loader)
    for epoch in range(num_epoch):
        i=-1
        for images, seg_label, kp_gt_x, kp_gt_y, mask_front, domains in tqdm(train_loader):
            i += 1

            if use_gpu:
                images = images.cuda()
                seg_label = seg_label.cuda()
                kp_gt_x = kp_gt_x.cuda()
                kp_gt_y = kp_gt_y.cuda()
                mask_front = mask_front.cuda()
                domains = domains.cuda()


            d = domains[:, 0, 0].view(-1)
            zero_source = d.bool().all()
            domains = domains.view(-1)


            # if adapt=True, skip the batch if it contains zero source (synthetic) samples
            if adapt and zero_source:
                continue

            # forward pass
            output = m(images, adapt=adapt, domains=d)

            # discriminator
            pred_domains = output[2]
            seg_pred_domains = output[3]
            pos_pred_domains = output[4]
            l_disc = disc_loss(pred_domains, domains)

            l_seg_disc = seg_disc_loss(seg_pred_domains, d)
            l_pos_disc = pos_disc_loss(pos_pred_domains, d)



            if adapt:

                seg_label = source_only(seg_label, d)

            # segmentation
            pred_seg = output[0] # (BxOHxOW,C)
            seg_label = seg_label.view(-1)
            l_seg = seg_loss(pred_seg, seg_label)

            # regression
            mask_front = mask_front.repeat(number_point,1, 1, 1).permute(1,2,3,0).contiguous() # (B,OH,OW,NV)
            if adapt:
                mask_front = source_only(mask_front, d)
                kp_gt_x = source_only(kp_gt_x, d)
                kp_gt_y = source_only(kp_gt_y, d) 
            pred_x = output[1][0] * mask_front # (B,OH,OW,NV)
            pred_y = output[1][1] * mask_front
            kp_gt_x = kp_gt_x.float() * mask_front
            kp_gt_y = kp_gt_y.float() * mask_front
            l_pos = pos_loss(pred_x, kp_gt_x) + pos_loss(pred_y, kp_gt_y)

            # confidence
            conf = output[1][2] * mask_front # (B,OH,OW,NV)
            bias = torch.sqrt((pred_y-kp_gt_y)**2 + (pred_x-kp_gt_x)**2)
            conf_target = torch.exp(-modulating_factor * bias) * mask_front
            conf_target = conf_target.detach()
            l_conf = conf_loss(conf, conf_target)

            # combine all losses
            all_loss = l_seg * seg_loss_factor + l_pos * pos_loss_factor + l_conf * conf_loss_factor
            if adapt:
                all_loss += l_disc * disc_loss_factor + l_seg_disc * seg_disc_loss_factor + l_pos_disc * pos_disc_loss_factor

            optimizer.zero_grad()
            all_loss.backward()
            optimizer.step()

            # gradient debug
            avggrad, avgdata = network_grad_ratio(m)
            print('avg gradiant ratio: %f, %f, %f' % (avggrad, avgdata, avggrad/avgdata))


            _, binary_domains = torch.max(pred_domains, 1)
            n_target_pred = binary_domains.float().sum()/(76*76)
            correct = (binary_domains == domains).float().sum()
            total = domains.size(0)
            acc = correct/total * 100

            _, seg_binary_domains = torch.max(seg_pred_domains, 1)
            correct = (seg_binary_domains == d).float().sum()
            total = d.size(0)
            seg_disc_acc = correct/total * 100


            _, pos_binary_domains = torch.max(pos_pred_domains, 1)
            correct = (pos_binary_domains == d).float().sum()
            total = d.size(0)
            pos_disc_acc = correct/total * 100

            def set_disc(require_grad = True, first_disc_layer = 126, last_disc_layer=139):
                for name, param in m.named_parameters():
                    for layer_i in range(first_disc_layer, last_disc_layer+1):
                        if "model." + str(layer_i) in name:
                            param.requires_grad = require_grad

            if (i + 1) % 20 == 0 and not zero_source:
                # compute pixel-wise bias to measure training accuracy
                bias_acc.update(abs(pnz((pred_x - kp_gt_x).cpu()).mean()*i_w))

                print('Epoch [{}/{}], Step [{}/{}]: \n seg loss: {:.4f}, pos loss: {:.4f}, conf loss: {:.4f}, pixel-wise bias:{:.4f} '
                      'disc loss: {:.4f}, disc acc: {:.4f} '
                      'disc seg loss: {:.4f}, disc seg acc: {:.4f} '
                      'disc pos loss: {:.4f}, disc pos acc: {:.4f} '
                      .format(epoch + 1, num_epoch, i + 1, total_step, l_seg.item(), l_pos.item(), l_conf.item(), bias_acc.value,
                             l_disc.item(), acc.item(),
                             l_seg_disc.item(), seg_disc_acc.item(),
                             l_pos_disc.item(), pos_disc_acc.item(),
                     ))

                writer.add_scalar('seg_loss', l_seg.item(), epoch*total_step+i)
                writer.add_scalar('pos loss', l_pos.item(), epoch*total_step+i)
                writer.add_scalar('conf_loss', l_conf.item(), epoch*total_step+i)
                writer.add_scalar('pixel_wise bias', bias_acc.value, epoch*total_step+i)

                writer.add_scalar('disc_loss', l_disc.item(), epoch*total_step+i)
                writer.add_scalar('disc_acc', acc.item(), epoch*total_step+i)

                writer.add_scalar('seg_disc_loss', l_seg_disc.item(), epoch*total_step+i)
                writer.add_scalar('seg_disc_acc', seg_disc_acc.item(), epoch*total_step+i)

                writer.add_scalar('pos_disc_loss', l_pos_disc.item(), epoch*total_step+i)
                writer.add_scalar('pos_disc_acc', pos_disc_acc.item(), epoch*total_step+i)

        bias_acc._reset()
        scheduler.step()

        # save weights
        if (epoch+1) % save_interval == 0:
            print("save weights to: ", weight_path(epoch))
            m.module.save_weights(weight_path(epoch))

    m.module.save_weights(weight_path(epoch))
    writer.close()
示例#8
0
文件: test.py 项目: pawanw17/grasping
def test(data_cfg,
         weightfile,
         listfile,
         outdir,
         object_names,
         intrinsics,
         vertex,
         bestCnt,
         conf_thresh,
         linemod_index=False,
         use_gpu=False,
         gpu_id='0'):
    """
    Main pose estimation testing driver,
    Used to run inference on network and save visual results.
    Arguments:
    data_cfg      : path to data config file.
    weightfile    : path to pretrained weights file.
    listfile      : path to text file with list of test images.
    outdir        : path to output directory.
    object_names  : list of object names in dataset.
    intrinsics    : intrinsic matrix of camera.
    vertex        : vertices of 3d point cloud of different dataset objects (for visualization).
    bestCnt       : best count.
    conf_thresh   : confidence threshold.
    linemod_index : whether to use linemod index or not.
    use_gpu       : whether to use gpu or not.
    """
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    # parse config data and load network
    data_options = read_data_cfg(data_cfg)
    m = SegPoseNet(data_options, False)

    # print network and load weights
    m.print_network()
    m.load_weights(weightfile)
    print('Loading weights from %s... Done!' % (weightfile))

    if use_gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpu_id
        m.cuda()

    # read list of test images
    with open(listfile, 'r') as file:
        imglines = file.readlines()

    # loop over all images in test list
    for idx in range(len(imglines)):
        imgfile = imglines[idx].rstrip()
        img = imread(imgfile)  # read image

        dirname, filename = os.path.split(imgfile)
        baseName, _ = os.path.splitext(filename)
        if linemod_index:
            outFileName = baseName[-4:]
        else:
            dirname = os.path.splitext(dirname[dirname.rfind('/') + 1:])[0]
            outFileName = dirname + '_' + baseName

        start = time.time()
        predPose = do_detect(m, img, intrinsics, bestCnt, conf_thresh,
                             use_gpu)  # perform pose estimation
        finish = time.time()

        arch = 'CPU'
        if use_gpu:
            arch = 'GPU'
        print('%s: Predict %d objects in %f seconds (on %s).' %
              (imgfile, len(predPose), (finish - start), arch))
        save_predictions(outFileName, predPose, object_names,
                         outdir)  # save output poses

        # visualize predictions
        vis_start = time.time()
        visImg = visualize_predictions(predPose, img, vertex, intrinsics)
        imsave(outdir + '/' + outFileName + '.jpg', visImg)
        vis_finish = time.time()
        print('%s: Visualization in %f seconds.' % (imgfile,
                                                    (vis_finish - vis_start)))
def train(data_cfg):
    data_options = read_data_cfg(data_cfg)
    m = SegPoseNet(data_options)
    if load_weight_from_path is not None:
        m.load_weights(load_weight_from_path)
        print("Load weights from ", load_weight_from_path)
    i_h = m.height
    i_w = m.width
    o_h = m.output_h
    o_w = m.output_w
    # m.print_network()
    m.train()
    bias_acc = meters()
    optimizer = torch.optim.SGD(m.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [int(0.5*num_epoch), int(0.75*num_epoch),
                                                                 int(0.9*num_epoch)], gamma=0.1)
    if use_gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible
        m = torch.nn.DataParallel(m, device_ids=gpu_id)
        m.cuda()

    train_dataset = YCB_Dataset(ycb_data_path, imageset_path, syn_data_path=syn_data_path, target_h=o_h, target_w=o_w,
                      use_real_img=use_real_img, bg_path=bg_path, num_syn_images=num_syn_img,
                                data_cfg="data/data-YCB.cfg", kp_path=kp_path)
    median_balancing_weight = train_dataset.weight_cross_entropy.cuda() if use_gpu \
        else train_dataset.weight_cross_entropy

    print('training on %d images'%len(train_dataset))
    if gen_kp_gt:
        train_dataset.gen_kp_gt()

    # Loss configurations
    seg_loss = nn.CrossEntropyLoss(weight=median_balancing_weight)
    pos_loss = nn.L1Loss()
    pos_loss_factor = 1.3  # 0.02 in original paper
    conf_loss = nn.L1Loss()
    conf_loss_factor = 0.8  # 0.02 in original paper

    # split into train and val
    train_db, val_db = torch.utils.data.random_split(train_dataset, [len(train_dataset)-2000, 2000])


    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, # not use validation now
                                               batch_size=batch_size, num_workers=num_workers,
                                               shuffle=True)
    val_loader = torch.utils.data.DataLoader(dataset=val_db,
                                               batch_size=batch_size,num_workers=num_workers,
                                               shuffle=True)
    # Train the model
    total_step = len(train_loader)
    for epoch in range(num_epoch):
        i=-1
        for images, seg_label, kp_gt_x, kp_gt_y, mask_front in tqdm(train_loader):
            i += 1
            if use_gpu:
                images = images.cuda()
                seg_label = seg_label.cuda()
                kp_gt_x = kp_gt_x.cuda()
                kp_gt_y = kp_gt_y.cuda()
                mask_front = mask_front.cuda()

            # forward pass
            output = m(images)

            # segmentation
            pred_seg = output[0] # (BxOHxOW,C)
            seg_label = seg_label.view(-1)

            l_seg =seg_loss(pred_seg, seg_label)

            # regression
            mask_front = mask_front.repeat(number_point,1, 1, 1).permute(1,2,3,0).contiguous() # (B,OH,OW,NV)
            pred_x = output[1][0] * mask_front # (B,OH,OW,NV)
            pred_y = output[1][1] * mask_front
            kp_gt_x = kp_gt_x.float() * mask_front
            kp_gt_y = kp_gt_y.float() * mask_front
            l_pos = pos_loss(pred_x, kp_gt_x) + pos_loss(pred_y, kp_gt_y)

            # confidence
            conf = output[1][2] * mask_front # (B,OH,OW,NV)
            bias = torch.sqrt((pred_y-kp_gt_y)**2 + (pred_x-kp_gt_x)**2)
            conf_target = torch.exp(-modulating_factor * bias) * mask_front
            conf_target = conf_target.detach()
            l_conf = conf_loss(conf, conf_target)

            # combine all losses
            all_loss = l_seg + l_pos * pos_loss_factor + l_conf * conf_loss_factor
            optimizer.zero_grad()
            all_loss.backward()
            optimizer.step()

            if (i + 1) % 100 == 0:
                # compute pixel-wise bias to measure training accuracy
                bias_acc.update(abs(pnz((pred_x - kp_gt_x).cpu()).mean()*i_w))
                print('Epoch [{}/{}], Step [{}/{}]: \n seg loss: {:.4f}, pos loss: {:.4f}, conf loss: {:.4f}, '
                      'Pixel-wise bias:{:.4f}'
                      .format(epoch + 1, num_epoch, i + 1, total_step, l_seg.item(), l_pos.item(),
                              l_conf.item(), bias_acc.value))

                writer.add_scalar('seg_loss', l_seg.item(), epoch*total_step+i)
                writer.add_scalar('pos loss', l_pos.item(), epoch*total_step+i)
                writer.add_scalar('conf_loss', l_conf.item(), epoch*total_step+i)
                writer.add_scalar('pixel_wise bias', bias_acc.value, epoch*total_step+i)
        bias_acc._reset()
        scheduler.step()
        if epoch % 5 == 1:
            m.module.save_weights(weight_path)
    m.module.save_weights(weight_path)
    writer.close()
                }, save_name)

    time_elapsed = time.time() - since
    print('Train Epoch: {} complete in {:.0f}m {:.0f}s'.format(
        epoch, time_elapsed // 60, time_elapsed % 60))


if __name__ == '__main__':
    args = args_setting()
    torch.manual_seed(args.seed)
    use_cuda = args.cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    save_root = './model'
    data_options = read_data_cfg(args.cfg_file)
    model = SegPoseNet(data_options).to(device)
    model.print_network()

    # turn image into floatTensor
    op_tranforms = transforms.Compose([transforms.ToTensor()])

    mesh = load_objects(config.object_model_root)
    bbox_3d = get_bbox8_3d_from_dict(mesh)

    # load data for batches, num_workers for multiprocess
    train_loader = torch.utils.data.DataLoader(
        FPHA_hand(file_path=config.train_img_path,
                  hand_label=config.train_hand_annotation,
                  object_label=config.train_object_annotation,
                  transforms=op_tranforms,
                  mesh=mesh,