Exemplo n.º 1
0
def train(epoch):

    global processed_batches

    # Initialize timer
    t0 = time.time()

    # Get the dataloader for training dataset
    train_loader = torch.utils.data.DataLoader(dataset_multi.listDataset(
        trainlist,
        shape=(init_width, init_height),
        shuffle=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
        ]),
        train=True,
        seen=model.module.seen,
        batch_size=batch_size,
        num_workers=num_workers,
        bg_file_names=bg_file_names),
                                               batch_size=batch_size,
                                               shuffle=False,
                                               **kwargs)

    # TRAINING
    lr = adjust_learning_rate(optimizer, processed_batches)
    logging('epoch %d, processed %d samples, lr %f' %
            (epoch, epoch * len(train_loader.dataset), lr))
    # Start training
    model.train()
    t1 = time.time()
    avg_time = torch.zeros(9)
    niter = 0
    # Iterate through batches
    for batch_idx, (data, target) in enumerate(train_loader):
        t2 = time.time()
        # adjust learning rate
        adjust_learning_rate(optimizer, processed_batches)
        processed_batches = processed_batches + 1
        # Pass the data to GPU
        if use_cuda:
            data = data.cuda()
        t3 = time.time()
        # Wrap tensors in Variable class for automatic differentiation
        data, target = Variable(data), Variable(target)
        t4 = time.time()
        # Zero the gradients before running the backward pass
        optimizer.zero_grad()
        t5 = time.time()
        # Forward pass
        output = model(data)
        t6 = time.time()
        region_loss.seen = region_loss.seen + data.data.size(0)
        # Compute loss, grow an array of losses for saving later on
        loss = region_loss(output, target)
        training_iters.append(
            epoch * math.ceil(len(train_loader.dataset) / float(batch_size)) +
            niter)
        training_losses.append(convert2cpu(loss.data))
        niter += 1
        t7 = time.time()
        # Backprop: compute gradient of the loss with respect to model parameters
        loss.backward()
        t8 = time.time()
        # Update weights
        optimizer.step()
        t9 = time.time()
        # Print time statistics
        if False and batch_idx > 1:
            avg_time[0] = avg_time[0] + (t2 - t1)
            avg_time[1] = avg_time[1] + (t3 - t2)
            avg_time[2] = avg_time[2] + (t4 - t3)
            avg_time[3] = avg_time[3] + (t5 - t4)
            avg_time[4] = avg_time[4] + (t6 - t5)
            avg_time[5] = avg_time[5] + (t7 - t6)
            avg_time[6] = avg_time[6] + (t8 - t7)
            avg_time[7] = avg_time[7] + (t9 - t8)
            avg_time[8] = avg_time[8] + (t9 - t1)
            print('-------------------------------')
            print('       load data : %f' % (avg_time[0] / (batch_idx)))
            print('     cpu to cuda : %f' % (avg_time[1] / (batch_idx)))
            print('cuda to variable : %f' % (avg_time[2] / (batch_idx)))
            print('       zero_grad : %f' % (avg_time[3] / (batch_idx)))
            print(' forward feature : %f' % (avg_time[4] / (batch_idx)))
            print('    forward loss : %f' % (avg_time[5] / (batch_idx)))
            print('        backward : %f' % (avg_time[6] / (batch_idx)))
            print('            step : %f' % (avg_time[7] / (batch_idx)))
            print('           total : %f' % (avg_time[8] / (batch_idx)))
        t1 = time.time()
    t1 = time.time()
    return epoch * math.ceil(
        len(train_loader.dataset) / float(batch_size)) + niter - 1
Exemplo n.º 2
0
def eval(niter, datacfg, cfgfile):
    def truths_length(truths):
        for i in range(50):
            if truths[i][1] == 0:
                return i

    # Parse configuration files
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    meshname = options['mesh']
    backupdir = options['backup']
    name = options['name']
    prefix = 'results'
    # Read object model information, get 3D bounding box corners
    mesh = MeshPly(meshname)
    vertices = np.c_[np.array(mesh.vertices),
                     np.ones((len(mesh.vertices), 1))].transpose()
    corners3D = get_3D_corners(vertices)
    # Read intrinsic camera parameters
    internal_calibration = get_camera_intrinsic()

    # Get validation file names
    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    # Specify model, load pretrained weights, pass to GPU and set the module in evaluation mode
    model.eval()

    # Get the parser for the test dataset
    valid_dataset = dataset_multi.listDataset(valid_images,
                                              shape=(model.module.width,
                                                     model.module.height),
                                              shuffle=False,
                                              objclass=name,
                                              transform=transforms.Compose([
                                                  transforms.ToTensor(),
                                              ]))
    valid_batchsize = 1

    # Specify the number of workers for multiple processing, get the dataloader for the test dataset
    kwargs = {'num_workers': 4, 'pin_memory': True}
    test_loader = torch.utils.data.DataLoader(valid_dataset,
                                              batch_size=valid_batchsize,
                                              shuffle=False,
                                              **kwargs)

    # Parameters
    num_classes = model.module.num_classes
    anchors = model.module.anchors
    num_anchors = model.module.num_anchors
    testing_error_pixel = 0.0
    testing_samples = 0.0
    errs_2d = []

    logging("   Number of test samples: %d" % len(test_loader.dataset))
    # Iterate through test examples
    for batch_idx, (data, target) in enumerate(test_loader):
        t1 = time.time()

        # Pass the data to GPU
        if use_cuda:
            data = data.cuda()
            target = target.cuda()

        # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
        data = Variable(data)
        t2 = time.time()

        # Formward pass
        with torch.no_grad():
            output = model(data).data
        t3 = time.time()

        # Using confidence threshold, eliminate low-confidence predictions
        trgt = target[0].view(-1, 21)
        all_boxes = get_corresponding_region_boxes(output,
                                                   conf_thresh,
                                                   num_classes,
                                                   anchors,
                                                   num_anchors,
                                                   int(trgt[0][0]),
                                                   only_objectness=0)
        t4 = time.time()

        # Iterate through all batch elements
        for i in range(output.size(0)):

            # For each image, get all the predictions
            boxes = all_boxes[i]

            # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
            truths = target[i].view(-1, 21)

            # Get how many objects are present in the scene
            num_gts = truths_length(truths)

            # Iterate through each ground-truth object
            for k in range(num_gts):
                box_gt = [
                    truths[k][1], truths[k][2], truths[k][3], truths[k][4],
                    truths[k][5], truths[k][6], truths[k][7], truths[k][8],
                    truths[k][9], truths[k][10], truths[k][11], truths[k][12],
                    truths[k][13], truths[k][14], truths[k][15], truths[k][16],
                    truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]
                ]
                best_conf_est = -1

                # If the prediction has the highest confidence, choose it as our prediction
                for j in range(len(boxes)):
                    if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int(
                            truths[k][0])):
                        best_conf_est = boxes[j][18]
                        box_pr = boxes[j]
                        bb2d_gt = get_2d_bb(box_gt[:18], output.size(3))
                        bb2d_pr = get_2d_bb(box_pr[:18], output.size(3))
                        #iou           = bbox_iou(bb2d_gt, bb2d_pr)
                        #match         = corner_confidence9(box_gt[:18], torch.FloatTensor(boxes[j][:18]))

                # Denormalize the corner predictions
                corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]),
                                        dtype='float32')
                corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]),
                                        dtype='float32')
                corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width
                corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height
                corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width
                corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height
                corners2D_gt_corrected = fix_corner_order(
                    corners2D_gt)  # Fix the order of the corners in OCCLUSION

                # Compute [R|t] by pnp
                objpoints3D = np.array(np.transpose(
                    np.concatenate((np.zeros((3, 1)), corners3D[:3, :]),
                                   axis=1)),
                                       dtype='float32')
                K = np.array(internal_calibration, dtype='float32')
                R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K)
                R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K)

                # Compute pixel error
                Rt_gt = np.concatenate((R_gt, t_gt), axis=1)
                Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
                proj_2d_gt = compute_projection(vertices, Rt_gt,
                                                internal_calibration)
                proj_2d_pred = compute_projection(vertices, Rt_pr,
                                                  internal_calibration)
                proj_corners_gt = np.transpose(
                    compute_projection(corners3D, Rt_gt, internal_calibration))
                proj_corners_pr = np.transpose(
                    compute_projection(corners3D, Rt_pr, internal_calibration))
                norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
                pixel_dist = np.mean(norm)
                errs_2d.append(pixel_dist)

                # Sum errors
                testing_error_pixel += pixel_dist
                testing_samples += 1

        t5 = time.time()

    # Compute 2D reprojection score
    for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]:
        acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (
            len(errs_2d) + eps)
        logging('   Acc using {} px 2D Projection = {:.2f}%'.format(
            px_threshold, acc))

    if True:
        logging('-----------------------------------')
        logging('  tensor to cuda : %f' % (t2 - t1))
        logging('         predict : %f' % (t3 - t2))
        logging('get_region_boxes : %f' % (t4 - t3))
        logging('            eval : %f' % (t5 - t4))
        logging('           total : %f' % (t5 - t1))
        logging('-----------------------------------')

    # Register losses and errors for saving later on
    testing_iters.append(niter)
    testing_errors_pixel.append(testing_error_pixel /
                                (float(testing_samples) + eps))
    testing_accuracies.append(acc)
Exemplo n.º 3
0
def valid(datacfg, cfgfile, weightfile):
    def truths_length(truths):
        for i in range(50):
            if truths[i][1] == 0:
                return i

    # Parse data configuration files
    data_options = read_data_cfg(datacfg)
    valid_images = data_options['valid']
    meshname = data_options['mesh']
    name = data_options['name']
    im_width = int(data_options['im_width'])
    im_height = int(data_options['im_height'])
    fx = float(data_options['fx'])
    fy = float(data_options['fy'])
    u0 = float(data_options['u0'])
    v0 = float(data_options['v0'])

    # Parse net configuration file
    net_options = parse_cfg(cfgfile)[0]
    loss_options = parse_cfg(cfgfile)[-1]
    conf_thresh = float(net_options['conf_thresh'])
    num_keypoints = int(net_options['num_keypoints'])
    num_classes = int(loss_options['classes'])
    num_anchors = int(loss_options['num'])
    anchors = [float(anchor) for anchor in loss_options['anchors'].split(',')]

    # Read object model information, get 3D bounding box corners, get intrinsics
    mesh = MeshPly(meshname)
    vertices = np.c_[np.array(mesh.vertices),
                     np.ones((len(mesh.vertices), 1))].transpose()
    corners3D = get_3D_corners(vertices)
    diam = float(data_options['diam'])
    intrinsic_calibration = get_camera_intrinsic(u0, v0, fx,
                                                 fy)  # camera params

    # Network I/O params
    num_labels = 2 * num_keypoints + 3  # +2 for width, height, +1 for object class
    errs_2d = []  # to save
    with open(valid_images) as fp:  # validation file names
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    # Compute-related Parameters
    use_cuda = True  # whether to use cuda or no
    kwargs = {'num_workers': 4, 'pin_memory': True}  # number of workers etc.

    # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode
    model = Darknet(cfgfile)
    model.load_weights(weightfile)
    model.cuda()
    model.eval()

    # Get the dataloader for the test dataset
    valid_dataset = dataset_multi.listDataset(valid_images,
                                              shape=(model.width,
                                                     model.height),
                                              shuffle=False,
                                              objclass=name,
                                              transform=transforms.Compose([
                                                  transforms.ToTensor(),
                                              ]))
    test_loader = torch.utils.data.DataLoader(valid_dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              **kwargs)

    # Iterate through test batches (Batch size for test data is 1)
    logging('Testing {}...'.format(name))
    for batch_idx, (data, target) in enumerate(test_loader):

        t1 = time.time()
        # Pass data to GPU
        if use_cuda:
            data = data.cuda()
            # target = target.cuda()

        # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
        data = Variable(data, volatile=True)
        t2 = time.time()

        # Forward pass
        output = model(data).data
        t3 = time.time()

        # Using confidence threshold, eliminate low-confidence predictions
        trgt = target[0].view(-1, num_labels)
        all_boxes = get_multi_region_boxes(output,
                                           conf_thresh,
                                           num_classes,
                                           num_keypoints,
                                           anchors,
                                           num_anchors,
                                           int(trgt[0][0]),
                                           only_objectness=0)
        t4 = time.time()

        # Iterate through all images in the batch
        for i in range(output.size(0)):

            # For each image, get all the predictions
            boxes = all_boxes[i]

            # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
            truths = target[i].view(-1, num_labels)

            # Get how many object are present in the scene
            num_gts = truths_length(truths)

            # Iterate through each ground-truth object
            for k in range(num_gts):
                box_gt = list()
                for j in range(1, num_labels):
                    box_gt.append(truths[k][j])
                box_gt.extend([1.0, 1.0])
                box_gt.append(truths[k][0])

                # If the prediction has the highest confidence, choose it as our prediction
                best_conf_est = -sys.maxsize
                for j in range(len(boxes)):
                    if (boxes[j][2 * num_keypoints] >
                            best_conf_est) and (boxes[j][2 * num_keypoints + 2]
                                                == int(truths[k][0])):
                        best_conf_est = boxes[j][2 * num_keypoints]
                        box_pr = boxes[j]
                        match = corner_confidence(
                            box_gt[:2 * num_keypoints],
                            torch.FloatTensor(boxes[j][:2 * num_keypoints]))

                # Denormalize the corner predictions
                corners2D_gt = np.array(np.reshape(box_gt[:2 * num_keypoints],
                                                   [-1, 2]),
                                        dtype='float32')
                corners2D_pr = np.array(np.reshape(box_pr[:2 * num_keypoints],
                                                   [-1, 2]),
                                        dtype='float32')
                corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width
                corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height
                corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width
                corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height
                corners2D_gt_corrected = fix_corner_order(
                    corners2D_gt)  # Fix the order of corners

                # Compute [R|t] by pnp
                objpoints3D = np.array(np.transpose(
                    np.concatenate((np.zeros((3, 1)), corners3D[:3, :]),
                                   axis=1)),
                                       dtype='float32')
                K = np.array(intrinsic_calibration, dtype='float32')
                R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K)
                R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K)

                # Compute pixel error
                Rt_gt = np.concatenate((R_gt, t_gt), axis=1)
                Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
                proj_2d_gt = compute_projection(vertices, Rt_gt,
                                                intrinsic_calibration)
                proj_2d_pred = compute_projection(vertices, Rt_pr,
                                                  intrinsic_calibration)
                proj_corners_gt = np.transpose(
                    compute_projection(corners3D, Rt_gt,
                                       intrinsic_calibration))
                proj_corners_pr = np.transpose(
                    compute_projection(corners3D, Rt_pr,
                                       intrinsic_calibration))
                norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
                pixel_dist = np.mean(norm)
                errs_2d.append(pixel_dist)

        t5 = time.time()

    # Compute 2D projection score
    eps = 1e-5
    for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]:
        acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (
            len(errs_2d) + eps)
        # Print test statistics
        logging('   Acc using {} px 2D Projection = {:.2f}%'.format(
            px_threshold, acc))
Exemplo n.º 4
0
def valid(datacfg, cfgfile, weightfile, conf_th):
    def truths_length(truths):
        for i in range(50):
            if truths[i][1] == 0:
                return i

    # Parse configuration files
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    meshname = options['mesh']
    name = options['name']
    prefix = 'results'
    # Read object model information, get 3D bounding box corners
    mesh = MeshPly(meshname)
    vertices = np.c_[np.array(mesh.vertices),
                     np.ones((len(mesh.vertices), 1))].transpose()
    corners3D = get_3D_corners(vertices)
    diam = float(options['diam'])

    # Read intrinsic camera parameters
    internal_calibration = get_camera_intrinsic()

    # Get validation file names
    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode
    model = Darknet(cfgfile)
    model.load_weights(weightfile)
    model.cuda()
    model.eval()

    test_width = 544
    test_height = 544

    # Get the parser for the test dataset
    valid_dataset = dataset_multi.listDataset(valid_images,
                                              shape=(test_width, test_height),
                                              shuffle=False,
                                              objclass=name,
                                              transform=transforms.Compose([
                                                  transforms.ToTensor(),
                                              ]))
    valid_batchsize = 1

    # Specify the number of workers for multiple processing, get the dataloader for the test dataset
    kwargs = {'num_workers': 4, 'pin_memory': True}
    test_loader = torch.utils.data.DataLoader(valid_dataset,
                                              batch_size=valid_batchsize,
                                              shuffle=False,
                                              **kwargs)

    # Parameters
    use_cuda = True
    num_classes = 2
    anchors = [
        1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879,
        5.8851
    ]
    num_anchors = 5
    eps = 1e-5
    conf_thresh = conf_th
    iou_thresh = 0.5

    # Parameters to save
    errs_2d = []
    edges = [[1, 2], [1, 3], [1, 5], [2, 4], [2, 6], [3, 4], [3, 7], [4, 8],
             [5, 6], [5, 7], [6, 8], [7, 8]]
    edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6],
                     [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]]

    # Iterate through test batches (Batch size for test data is 1)
    logging('Testing {}...'.format(name))
    for batch_idx, (data, target) in enumerate(test_loader):

        t1 = time.time()
        # Pass data to GPU
        if use_cuda:
            data = data.cuda()
            # target = target.cuda()

        # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
        data = Variable(data, volatile=True)
        t2 = time.time()

        # Forward pass
        output = model(data).data
        t3 = time.time()

        # Using confidence threshold, eliminate low-confidence predictions
        trgt = target[0].view(-1, 21)
        all_boxes = get_corresponding_region_boxes(output,
                                                   conf_thresh,
                                                   num_classes,
                                                   anchors,
                                                   num_anchors,
                                                   int(trgt[0][0]),
                                                   only_objectness=0)
        t4 = time.time()

        # Iterate through all images in the batch
        for i in range(output.size(0)):

            # For each image, get all the predictions
            boxes = all_boxes[i]

            # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
            truths = target[i].view(-1, 21)
            if debug_multi:
                print(type(truth))

            # Get how many object are present in the scene
            num_gts = truths_length(truths)
            if debug_multi:
                print('numbers of ground truth: ' + str(num_gts))

            # Iterate through each ground-truth object
            for k in range(num_gts):
                if debug_multi:
                    print('object class in label is: ' + str(truths[k][0]))

                box_gt = [
                    truths[k][1], truths[k][2], truths[k][3], truths[k][4],
                    truths[k][5], truths[k][6], truths[k][7], truths[k][8],
                    truths[k][9], truths[k][10], truths[k][11], truths[k][12],
                    truths[k][13], truths[k][14], truths[k][15], truths[k][16],
                    truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]
                ]
                best_conf_est = -1

                # If the prediction has the highest confidence, choose it as our prediction
                for j in range(len(boxes)):
                    if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int(
                            truths[k][0])):
                        best_conf_est = boxes[j][18]
                        box_pr = boxes[j]
                        bb2d_gt = get_2d_bb(box_gt[:18], output.size(3))
                        bb2d_pr = get_2d_bb(box_pr[:18], output.size(3))
                        iou = bbox_iou(bb2d_gt, bb2d_pr)
                        match = corner_confidence9(
                            box_gt[:18], torch.FloatTensor(boxes[j][:18]))

                # Denormalize the corner predictions
                corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]),
                                        dtype='float32')
                corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]),
                                        dtype='float32')
                corners2D_gt[:, 0] = corners2D_gt[:, 0] * 1280
                corners2D_gt[:, 1] = corners2D_gt[:, 1] * 720
                corners2D_pr[:, 0] = corners2D_pr[:, 0] * 1280
                corners2D_pr[:, 1] = corners2D_pr[:, 1] * 720
                #corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of corners
                # don't fix corner since the order is already correct
                corners2D_gt_corrected = corners2D_gt

                if debug_multi:
                    print('2d corners ground truth: ')
                    print(type(corners2D_gt_corrected))
                    print(corners2D_gt_corrected)

                # Compute [R|t] by pnp
                objpoints3D = np.array(np.transpose(
                    np.concatenate((np.zeros((3, 1)), corners3D[:3, :]),
                                   axis=1)),
                                       dtype='float32')
                # make correction to 3D points for class 2 & 3 (i.e. upperPortRed and uppoerPortBlue)
                correspondingclass = boxes[j][20]
                if (correspondingclass == 2 or correspondingclass == 3):
                    x_min_3d = 0
                    x_max_3d = 1.2192
                    y_min_3d = 0
                    y_max_3d = 1.1176
                    z_min_3d = 0
                    z_max_3d = 0.003302
                    centroid = [(x_min_3d + x_max_3d) / 2,
                                (y_min_3d + y_max_3d) / 2,
                                (z_min_3d + z_max_3d) / 2]

                    objpoints3D = np.array([centroid,\
                    [ x_min_3d, y_min_3d, z_min_3d],\
                    [ x_min_3d, y_min_3d, z_max_3d],\
                    [ x_min_3d, y_max_3d, z_min_3d],\
                    [ x_min_3d, y_max_3d, z_max_3d],\
                    [ x_max_3d, y_min_3d, z_min_3d],\
                    [ x_max_3d, y_min_3d, z_max_3d],\
                    [ x_max_3d, y_max_3d, z_min_3d],\
                    [ x_max_3d, y_max_3d, z_max_3d]])

                K = np.array(internal_calibration, dtype='float32')
                _, R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K)
                _, R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K)

                # Compute pixel error
                Rt_gt = np.concatenate((R_gt, t_gt), axis=1)
                Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
                proj_2d_gt = compute_projection(vertices, Rt_gt,
                                                internal_calibration)
                proj_2d_pred = compute_projection(vertices, Rt_pr,
                                                  internal_calibration)
                proj_corners_gt = np.transpose(
                    compute_projection(corners3D, Rt_gt, internal_calibration))
                proj_corners_pr = np.transpose(
                    compute_projection(corners3D, Rt_pr, internal_calibration))
                norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
                pixel_dist = np.mean(norm)
                errs_2d.append(pixel_dist)

        t5 = time.time()

    # Compute 2D projection score
    for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]:
        acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (
            len(errs_2d) + eps)
        # Print test statistics
        logging('   Acc using {} px 2D Projection = {:.2f}%'.format(
            px_threshold, acc))
Exemplo n.º 5
0
def train(epoch):

    global processed_batches

    # Initialize timer
    t0 = time.time()
    # Get the dataloader for training dataset
    train_loader = torch.utils.data.DataLoader(dataset.listDataset(
        trainlist,
        shape=(init_width, init_height),
        shuffle=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
        ]),
        train=True,
        seen=model.seen,
        batch_size=batch_size,
        num_workers=num_workers,
        bg_file_names=bg_file_names),
                                               batch_size=batch_size,
                                               shuffle=False,
                                               **kwargs)

    # TRAINING
    lr = adjust_learning_rate(optimizer, epoch)
    logging('epoch %d, processed %d samples, lr %f' %
            (epoch, epoch * len(train_loader.dataset), lr * 1000))
    #log_file.write('epoch %d, processed %d samples, lr %f' % (epoch, epoch * len(train_loader.dataset), lr))
    # Start training
    model.train()
    t1 = time.time()
    avg_time = torch.zeros(9)
    niter = 0
    # Iterate through batches
    training_losses = []
    for data, target in tqdm(iter(train_loader)):
        t2 = time.time()
        # adjust learning rate
        processed_batches = processed_batches + 1
        # Pass the data to GPU
        data = data.cuda()
        t3 = time.time()
        # Wrap tensors in Variable class for automatic differentiation
        data, target = Variable(data), Variable(target)
        t4 = time.time()
        # Zero the gradients before running the backward pass
        optimizer.zero_grad()
        t5 = time.time()
        # Forward pass
        output = model(data)
        t6 = time.time()
        model.seen = model.seen + data.data.size(0)
        region_loss.seen = region_loss.seen + data.data.size(0)
        # Compute loss, grow an array of losses for saving later on
        loss = region_loss(output, target)
        #training_iters.append(epoch * math.ceil(len(train_loader.dataset) / float(batch_size) ) + niter)
        niter += 1
        t7 = time.time()
        # Backprop: compute gradient of the loss with respect to model parameters
        loss.backward()
        t8 = time.time()
        # Update weights
        optimizer.step()
        t9 = time.time()
        # Print time statistics
        t1 = time.time()
        training_losses.append(float(loss.item()) / batch_size)
    t1 = time.time()
    avg = sum(training_losses) / len(training_losses)
    print('%d\t%f\t%f\n' % (epoch + 1, lr * 1000, avg))
    log_file.write('%d\t%f\t%f\n' % (epoch + 1, lr * 1000, avg))
    return epoch * math.ceil(
        len(train_loader.dataset) / float(batch_size)) + niter - 1, avg
Exemplo n.º 6
0
def eval(epoch, datacfg, cfgfile):
    def truths_length(truths):
        for i in range(50):
            if truths[i][1] == 0:
                return i

    # Parse configuration files
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    meshname = options['mesh']
    #backupdir     = options['backup']
    name = options['name']
    diam = float(options['diam'])
    vx_threshold = diam * 0.1
    prefix = 'results'
    # Read object model information, get 3D bounding box corners
    mesh = MeshPly(meshname)
    vertices = np.c_[np.array(mesh.vertices),
                     np.ones((len(mesh.vertices), 1))].transpose()
    corners3D = get_3D_corners(vertices)
    # Read intrinsic camera parameters
    internal_calibration = get_camera_intrinsic()

    # Get validation file names
    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    # Specify model, load pretrained weights, pass to GPU and set the module in evaluation mode
    model.eval()

    # Get the parser for the test dataset
    valid_dataset = dataset.listDataset(valid_images,
                                        shape=(init_width, init_height),
                                        shuffle=False,
                                        objclass=name,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
    valid_batchsize = 1

    # Specify the number of workers for multiple processing, get the dataloader for the test dataset
    kwargs = {'num_workers': 4, 'pin_memory': True}
    test_loader = torch.utils.data.DataLoader(valid_dataset,
                                              batch_size=valid_batchsize,
                                              shuffle=False,
                                              **kwargs)

    # Parameters
    num_classes = model.num_classes
    anchors = model.anchors
    num_anchors = model.num_anchors
    testing_error_trans = 0.0
    testing_error_angle = 0.0
    testing_error_pixel = 0.0
    testing_samples = 0.0
    errs_2d = []
    errs_3d = []
    errs_trans = []
    errs_angle = []
    errs_corner2D = []
    ts = [0.0, 0.0, 0.0, 0.0, 0.0]
    count = 0

    logging("   Number of test samples: %d" % len(test_loader.dataset))
    # Iterate through test examples
    for data, target in tqdm(iter(test_loader)):
        t1 = time.time()

        # Pass the data to GPU
        if use_cuda:
            data = data.cuda()

        # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
        with torch.no_grad():
            data = Variable(data)
        t2 = time.time()

        # Formward pass
        output = model(data).data.cpu()
        t3 = time.time()

        # Using confidence threshold, eliminate low-confidence predictions
        #trgt = target[0].view(-1, 21)
        #all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, int(trgt[0][0]), only_objectness=0)
        all_boxes = []
        for b in range(output.size(0)):
            boxes = {}
            for i in range(num_anchors):
                results = merge_kps_by_regions(output[b, i].squeeze())
                boxes[i] = results

        # Iterate through all batch elements
        for i in range(output.size(0)):

            # For each image, get all the predictions
            boxes = all_boxes[i]

            # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
            truths = target[i].view(-1, 21)

            # Get how many objects are present in the scene
            num_gts = truths_length(truths)

            # Iterate through each ground-truth object
            for k in range(num_gts):
                box_gt = [
                    truths[k][1], truths[k][2], truths[k][3], truths[k][4],
                    truths[k][5], truths[k][6], truths[k][7], truths[k][8],
                    truths[k][9], truths[k][10], truths[k][11], truths[k][12],
                    truths[k][13], truths[k][14], truths[k][15], truths[k][16],
                    truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]
                ]
                best_conf_est = -1

                # If the prediction has the highest confidence, choose it as our prediction
                for j in range(len(boxes)):
                    if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int(
                            truths[k][0])):
                        best_conf_est = boxes[j][18]
                        box_pr = boxes[j]
                        bb2d_gt = get_2d_bb(box_gt[:18], output.size(3))
                        bb2d_pr = get_2d_bb(box_pr[:18], output.size(3))
                        iou = bbox_iou(bb2d_gt, bb2d_pr)
                        match = corner_confidence9(
                            box_gt[:18], torch.FloatTensor(boxes[j][:18]))

                # Denormalize the corner predictions
                corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]),
                                        dtype='float32')
                corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]),
                                        dtype='float32')
                corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width
                corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height
                corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width
                corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height
                corners2D_gt_corrected = fix_corner_order(
                    corners2D_gt)  # Fix the order of the corners in OCCLUSION
                # Compute corner prediction error
                corner_norm = np.linalg.norm(corners2D_gt_corrected -
                                             corners2D_pr,
                                             axis=1)
                corner_dist = np.mean(corner_norm)
                errs_corner2D.append(corner_dist)

                # Compute [R|t] by pnp
                objpoints3D = np.array(np.transpose(
                    np.concatenate((np.zeros((3, 1)), corners3D[:3, :]),
                                   axis=1)),
                                       dtype='float32')
                K = np.array(internal_calibration, dtype='float32')
                R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K)
                R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K)
                # Compute translation error
                trans_dist = np.sqrt(np.sum(np.square(t_gt - t_pr)))
                errs_trans.append(trans_dist)

                # Compute angle error
                angle_dist = calcAngularDistance(R_gt, R_pr)
                errs_angle.append(angle_dist)

                # Compute pixel error
                Rt_gt = np.concatenate((R_gt, t_gt), axis=1)
                Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
                proj_2d_gt = compute_projection(vertices, Rt_gt,
                                                internal_calibration)
                proj_2d_pred = compute_projection(vertices, Rt_pr,
                                                  internal_calibration)
                proj_corners_gt = np.transpose(
                    compute_projection(corners3D, Rt_gt, internal_calibration))
                proj_corners_pr = np.transpose(
                    compute_projection(corners3D, Rt_pr, internal_calibration))
                norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
                pixel_dist = np.mean(norm)
                errs_2d.append(pixel_dist)

                # Compute 3D distances
                transform_3d_gt = compute_transformation(vertices, Rt_gt)
                transform_3d_pred = compute_transformation(vertices, Rt_pr)
                norm3d = np.linalg.norm(transform_3d_gt - transform_3d_pred,
                                        axis=0)
                vertex_dist = np.mean(norm3d)
                errs_3d.append(vertex_dist)

                # Sum errors
                testing_error_trans += trans_dist
                testing_error_angle += angle_dist
                testing_error_pixel += pixel_dist
                testing_samples += 1

        t5 = time.time()
        ts[0] += t2 - t1
        ts[1] += (t3 - t2)
        ts[2] += (t4 - t3)
        ts[3] += (t5 - t4)
        ts[4] += (t5 - t1)
        count += 1

    # Compute 2D reprojection score
    s = name + '\t'
    for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]:
        acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (
            len(errs_2d) + eps)
        logging('   Acc using {} px 2D Projection = {:.2f}%'.format(
            px_threshold, acc))
        s += str(acc) + '\t'

    if True:
        logging('-----------------------------------')
        logging('  tensor to cuda : %f' % (t2 - t1))
        logging('         predict : %f' % (t3 - t2))
        logging('get_region_boxes : %f' % (t4 - t3))
        logging('            eval : %f' % (t5 - t4))
        logging('           total : %f' % (t5 - t1))
        logging('-----------------------------------')
    tt = ''
    for i in range(5):
        ts[i] /= count
        tt += '%f\t' % ts[i]
    print(tt)

    # Register losses and errors for saving later on
    px_threshold = 5
    acc = len(np.where(
        np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d) + eps)
    acc_50 = len(
        np.where(np.array(errs_2d) <= 50)[0]) * 100. / (len(errs_2d) + eps)
    acc3d = len(np.where(
        np.array(errs_3d) <= vx_threshold)[0]) * 100. / (len(errs_3d) + eps)
    acc5cm5deg = len(
        np.where((np.array(errs_trans) <= 0.05)
                 & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) +
                                                              eps)
    corner_acc = len(np.where(np.array(errs_corner2D) <= px_threshold)
                     [0]) * 100. / (len(errs_corner2D) + eps)
    mean_err_2d = np.mean(errs_2d)
    mean_corner_err_2d = np.mean(errs_corner2D)
    nts = float(testing_samples)
    logging("   Mean corner error is %f" % (mean_corner_err_2d))
    logging('   Acc using {} px 2D Projection = {:.2f}%'.format(
        px_threshold, acc))
    logging('   Acc using {} vx 3D Transformation = {:.2f}%'.format(
        vx_threshold, acc3d))
    logging('   Acc using 5 cm 5 degree metric = {:.2f}%'.format(acc5cm5deg))
    logging('   Translation error: %f, angle error: %f' %
            (testing_error_trans / (nts + eps), testing_error_angle /
             (nts + eps)))
    test_log_file.write(s + '\n')
    #test_log_file.write('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n' % (name,epoch+1,mean_corner_err_2d,acc,acc3d,acc5cm5deg,testing_error_trans/(nts+eps), testing_error_angle/(nts+eps),acc_50))
    #test_log_file.write(tt+'\n')
    return acc