示例#1
0
    def __getitem__(self, index):

        #load data and meta data
        img = Image.open('{0}/{1}.color.png'.format(self.root, self.list[index]))
        depth = Image.open('{0}/{1}.depth.png'.format(self.root, self.list[index]))
        with open('{0}/{1}.meta.json'.format(self.root, self.list[index])) as json_file:
            image_meta = json.load(json_file)

        label = Image.open('{0}/{1}.gen.label.png'.format(self.label_root, self.list[index]))

        with open('{0}/{1}.meta.json'.format(self.label_root, self.list[index])) as json_file:
            meta = json.load(json_file)


        # find the object class id
        obj = self.class_id_names.index(meta['cls_name'])+1

        # augment the data
        if self.add_noise:
            # change color
            img = self.trancolor(img)

            # rotate
            angle = random.uniform(-180, 180)
            augment_rotation = np.identity(4)
            augment_rotation[:3, :3] = transforms3d.euler.euler2mat(0, 0, np.deg2rad(angle))
            img = img.rotate(angle)
            label = label.rotate(angle)
            depth = depth.rotate(angle)

        # load tf from the camera to the robot
        cam2robot = np.array(meta['cam2robot']).reshape((4, 4))

        # if the image is rotated we need to rotate the camera before computing the target object pose
        if self.add_noise:
            cam2robot = np.dot(np.linalg.inv(augment_rotation), cam2robot)

        # compute object pose
        robot2object = np.array(meta['robot2object']).reshape((4, 4))
        cam2object = np.dot(cam2robot, robot2object)
        target_r = cam2object[:3, :3]
        target_t = cam2object[:3, 3]

        # crop&zoom
        if self.add_noise:
            img, label, depth, delta_t, image_meta['intr'] = crop_and_zoom(img, label, depth, image_meta['intr'], target_t[2], augment_rotation)
            print(delta_t)
            print(target_t)
            #delta_t[2] = -delta_t[2]
            target_t -= delta_t
            #target_t[2] += delta_t[2]
            print(target_t)




        #convert input numpy
        img = np.array(img)
        label = np.array(label)
        depth = np.array(depth)


        if self.add_noise:
            depth = np.array(depth, dtype=np.float64)
            depth -= target_t[2]
            depth[depth < 0] = 0



        # get masks and bounding box around the object
        mask_label = ma.getmaskarray(ma.masked_equal(label, 255))
        rmin, rmax, cmin, cmax = get_bbox(mask_label)
        mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
        mask = mask_label * mask_depth



        # add some offset noise to the target position
        if self.add_noise:
            add_t = np.array([0, 0, 0])
            #add_t = np.array([random.uniform(-self.noise_trans, self.noise_trans) for i in range(3)])

        # select some points on the object
        choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
        if len(choose) > self.num_pt:
            c_mask = np.zeros(len(choose), dtype=int)
            c_mask[:self.num_pt] = 1
            np.random.shuffle(c_mask)
            choose = choose[c_mask.nonzero()]
        else:
            choose = np.pad(choose, (0, self.num_pt - len(choose)), 'wrap')

        # select the choosen points
        depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        choose = np.array([choose])

        # compute the cartesian space position of each pixel and sample to point cloud
        cam_scale = image_meta['depth_scale']
        pt2 = depth_masked / cam_scale
        pt0 = (ymap_masked - image_meta['intr']['ppx']) * pt2 / image_meta['intr']['fx']
        pt1 = (xmap_masked - image_meta['intr']['ppy']) * pt2 / image_meta['intr']['fy']
        cloud = np.concatenate((pt0, pt1, pt2), axis=1)
        if self.add_noise:
            cloud = np.add(cloud, add_t)

        # if needed delete some points from the target model points to have the same number all the time
        dellist = [j for j in range(0, len(self.cld[obj]))]
        if self.refine:
            dellist = random.sample(dellist, len(self.cld[obj]) - self.num_pt_mesh_large)
        else:
            dellist = random.sample(dellist, len(self.cld[obj]) - self.num_pt_mesh_small)
        model_points = np.delete(self.cld[obj], dellist, axis=0)

        # rotate and move the target into the image
        target = np.dot(model_points, target_r.T)
        if self.add_noise:
            target = np.add(target, target_t + add_t)
        else:
            target = np.add(target, target_t)

        # show the created sample for debug (if wanted)
        if self.show_sample:
            plt.subplot(1,2,1)
            image_cloud = pc_utils.pointcloud2image(img.copy(), cloud, 3, image_meta['intr'])
            plt.imshow(image_cloud)
            plt.title('cloud')
            plt.subplot(1,2,2)
            image_target = pc_utils.pointcloud2image(img.copy(), target, 3, image_meta['intr'])
            plt.imshow(image_target)
            plt.title('target')
            plt.show()

        # crop and convert image into torch image
        img_masked = np.transpose(np.array(img)[:, :, :3], (2, 0, 1))[:, rmin:rmax, cmin:cmax]

        # return sample as torch
        return torch.from_numpy(cloud.astype(np.float32)), \
               torch.LongTensor(choose.astype(np.int32)), \
               self.norm(torch.from_numpy(img_masked.astype(np.float32))), \
               torch.from_numpy(target.astype(np.float32)), \
               torch.from_numpy(model_points.astype(np.float32)), \
               torch.LongTensor([int(obj) - 1])
示例#2
0
def full_prediction(image, depth, meta, segmentor, estimator, refiner, to_tensor, normalize, device, cuda, color_dict,
                    class_names=None, point_clouds=None, plot=False, color_prediction=False, bbox=False, put_text=False):

    start_time = time.time()
    output_dict = {'predictions': {},
                   'elapsed_times': {}}

    if color_prediction:
        output_dict['segmented_prediction'] = np.array(copy.deepcopy(image), dtype=np.float)
        output_dict['pose_prediction'] = np.array(copy.deepcopy(image), dtype=np.float)

    # preprocesses input
    x = copy.deepcopy(image)
    x = to_tensor(x)
    x = normalize(x)

    x = x.to(device)
    x = x.unsqueeze(0)
    # get segmetation label
    pred = segmentor.predict(x)
    pred = F.softmax(pred, dim=1)
    if cuda:
        pred = pred.cpu()
    pred = pred[0]
    # crop and preproceses label to pass into the pose estimation
    pred_arg = torch.argmax(pred, dim=0).numpy()

    found_cls, counts = np.unique(pred_arg, return_counts=True)

    if 0 in found_cls:
        start = 1
    else:
        start = 0

    for i, cls in enumerate(found_cls[start:]):
        if counts[i+start] > 100:
            cls_pred_arg = copy.deepcopy(pred_arg)
            cls_pred_arg[cls_pred_arg != cls] = 0
            cls_pred = cls_pred_arg * pred[cls].numpy()

            ret, labels = cv2.connectedComponents(np.array(cls_pred_arg, dtype=np.uint8), connectivity=8)

            biggest = 1
            biggest_score = 0
            unique = np.unique(labels)
            if 0 in unique:
                start2 = 1
            else:
                start2 = 0
            for u in unique[start2:]:
                score = np.mean(cls_pred[labels == u])
                if score > biggest_score:
                    biggest_score = score
                    biggest = u

            cls_pred[labels != biggest] = 0
            cls_pred[cls_pred != 0] = 255
            cls_pred = np.array(cls_pred, dtype=np.uint8)

            output_dict['predictions'][class_names[cls-1]] = {'mask': cls_pred}

            if color_prediction:
                for c, color_value in enumerate(color_dict[class_names[cls-1]]['value']):
                    c_mask = np.zeros(cls_pred.shape)
                    c_mask[cls_pred != 0] = color_value
                    output_dict['segmented_prediction'][:, :, c][cls_pred != 0] = \
                        output_dict['segmented_prediction'][:, :, c][cls_pred != 0] * 0.7 + c_mask[cls_pred != 0] * 0.3

                if bbox:
                    bbox = get_bbox(cls_pred)
                    cv2.rectangle(output_dict['segmented_prediction'],
                                  (bbox[2], bbox[0]),
                                  (bbox[3], bbox[1]),
                                  color_dict[class_names[cls-1]]['value'],
                                  2)
                if put_text:
                    bbox = get_bbox(cls_pred)
                    try:
                        cv2.putText(output_dict['segmented_prediction'],
                                    'Segmentation',
                                    (10, 30),
                                    cv2.FONT_HERSHEY_SIMPLEX,
                                    1,
                                    (0, 0, 0),
                                    2,
                                    cv2.LINE_AA)

                        cv2.putText(output_dict['segmented_prediction'],
                                    class_names[cls - 1],
                                    (bbox[2] + 10, bbox[0] - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX,
                                    1,
                                    color_dict[class_names[cls-1]]['value'],
                                    2,
                                    cv2.LINE_AA)
                    except:
                        pass



    if color_prediction:
        output_dict['segmented_prediction'][output_dict['segmented_prediction'] < 0] = 0
        output_dict['segmented_prediction'][output_dict['segmented_prediction'] > 255] = 255
        output_dict['segmented_prediction'] = np.array(output_dict['segmented_prediction'], dtype=np.uint8)

    output_dict['elapsed_times']['segmentation'] = time.time()-start_time

    start_time_pose = time.time()
    xmap = np.array([[j for i in range(640)] for j in range(480)])
    ymap = np.array([[i for i in range(640)] for j in range(480)])
    num_points = 1000
    # estimate the pose
    for cls in output_dict['predictions']:
        #print('cls name', cls, class_names.index(cls))
        mask_label = ma.getmaskarray(ma.masked_equal(output_dict['predictions'][cls]['mask'], 255))
        rmin, rmax, cmin, cmax = get_bbox(mask_label)
        mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
        mask = mask_label * mask_depth
        # select some points on the object
        choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
        if len(choose) == 0:
            continue

        if len(choose) > num_points:
            c_mask = np.zeros(len(choose), dtype=int)
            c_mask[:num_points] = 1
            np.random.shuffle(c_mask)
            choose = choose[c_mask.nonzero()]
        else:
            choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')

        # select the choosen points
        depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        choose = np.array([choose])

        # compute the cartesian space position of each pixel and sample to point cloud

        pt2 = depth_masked * meta['depth_scale']
        pt0 = (ymap_masked - meta['intr']['ppx']) * pt2 / meta['intr']['fx']
        pt1 = (xmap_masked - meta['intr']['ppy']) * pt2 / meta['intr']['fy']

        points = np.concatenate((pt0, pt1, pt2), axis=1)
        #np_points = copy.deepcopy(points)

        points = torch.from_numpy(points.astype(np.float32)).unsqueeze(0).to(device)
        choose = torch.LongTensor(choose.astype(np.int32)).unsqueeze(0).to(device)

        img = np.transpose(np.array(image)[:, :, :3], (2, 0, 1))[:, rmin:rmax, cmin:cmax]
        img = normalize(torch.from_numpy(img.astype(np.float32))).unsqueeze(0).to(device)
        idx = torch.LongTensor([int(class_names.index(cls))]).unsqueeze(0).to(device)


        pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
        new_points = get_new_points(pred_r, pred_t, pred_c, points)
        _, my_r, my_t = my_estimator_prediction(pred_r, pred_t, pred_c, num_points, 1, points)

        # refine the pose
        for ite in range(0, 2):
            pred_r, pred_t = refiner(new_points, emb, idx)
        _, my_r, my_t = my_refined_prediction(pred_r, pred_t, my_r, my_t)

        output_dict['predictions'][cls]['position'] = my_t
        output_dict['predictions'][cls]['rotation'] = my_r

        if color_prediction:
            my_r = quaternion_matrix(my_r)[:3, :3]
            np_pred = np.dot(point_clouds[class_names.index(cls)], my_r.T)
            np_pred = np.add(np_pred, my_t)

            output_dict['pose_prediction'] = pc_utils.pointcloud2image(output_dict['pose_prediction'],
                                                                       np_pred,
                                                                       3,
                                                                       meta['intr'],
                                                                       color=color_dict[cls]['value'])

            if put_text:
                try:
                    cv2.putText(output_dict['pose_prediction'],
                                'Pose Estimation',
                                (10, 30),
                                cv2.FONT_HERSHEY_SIMPLEX,
                                1,
                                (0, 0, 0),
                                2,
                                cv2.LINE_AA)
                except:
                    pass

    if color_prediction:
        output_dict['pose_prediction'][output_dict['pose_prediction'] < 0] = 0
        output_dict['pose_prediction'][output_dict['pose_prediction'] > 255] = 255
        output_dict['pose_prediction'] = np.array(output_dict['pose_prediction'], dtype=np.uint8)

    output_dict['elapsed_times']['pose_estimation'] = time.time() - start_time_pose

    if plot and color_prediction:
        title = ''
        for cls in output_dict['predictions']:
            title += '{}: {}, '.format(color_dict[cls]['tag'], cls)
        fig, axs = plt.subplots(1, 2, constrained_layout=True)
        fig.suptitle(title)
        plt.subplot(1, 2, 1)
        plt.imshow(output_dict['segmented_prediction'])
        plt.axis('off')
        plt.title('segmented_prediction')
        plt.subplot(1, 2, 2)
        plt.imshow(output_dict['pose_prediction'])
        plt.axis('off')
        plt.title('pose_prediction')
        plt.show()

    del_keys = []
    for cls in output_dict['predictions'].keys():
        #print(output_dict['predictions'][cls].keys())
        if 'position' not in output_dict['predictions'][cls]:
            del_keys.append(cls)
        elif 'rotation' not in output_dict['predictions'][cls]:
            del_keys.append(cls)
        elif 'mask' not in output_dict['predictions'][cls]:
            del_keys.append(cls)

    for cls in del_keys:
        print('Deleting cls "{}"'.format(cls))
        del output_dict['predictions'][cls]

    output_dict['elapsed_times']['total'] = time.time() - start_time

    # return dict with found objects and their pose. also return the painted image

    return output_dict
示例#3
0
def visualise_pose_label(root):
    path = os.path.join(root, 'label_generator/data')

    objects = sorted(os.listdir(path))
    if len(objects) == 0:
        return print('No Labels available')

    while True:
        print('____________________________________________________________________')
        selection = get_selection(objects, 'Select Object')
        if not selection:
            break
        object_path = os.path.join(path, selection)
        foregrounds = sorted(os.listdir(object_path))
        if not foregrounds:
            print('The object "{}" has no foregrounds'.format(selection))
            continue

        while True:
            print('____________________________________________________________________')
            foreground = get_selection(foregrounds, 'Select the foreground')
            if not foreground:
                break
            foreground_path = os.path.join(object_path, foreground)
            label_dirs = list(os.listdir(foreground_path))
            if not label_dirs:
                print('The foreground "{}" has no labels'.format(foreground))
                continue

            l = len('.meta.json')
            dirs = np.array([d[:-l] for d in label_dirs if '.meta.json' in d])
            np.random.shuffle(dirs)

            data_path = os.path.join(root, 'data_generation/data', selection, foreground)
            pc_path = os.path.join(root, 'pc_reconstruction/data', selection, '{}.ply'.format(selection))

            cancellationToken = CancellationToken()
            thread = Thread(target=stop_visualise, args=[cancellationToken])
            thread.daemon = False
            thread.start()

            for d in dirs:
                with open(os.path.join(data_path, '{}.color.png'.format(d)), 'rb') as f:
                    image = np.array(Image.open(f).convert('RGB'), dtype=np.uint8)

                with open(os.path.join(data_path, '{}.meta.json'.format(d))) as f:
                    image_meta = json.load(f)
                    intr = image_meta.get('intr')

                with open(os.path.join(foreground_path, '{}.meta.json'.format(d))) as f:
                    meta = json.load(f)

                    object_position = np.array(meta.get('position'))
                    object_rotation = np.array(meta.get('rotation')).reshape(3, 3)

                cam2object = np.identity(4)
                cam2object[:3, 3] = object_position
                cam2object[:3, :3] = object_rotation
                point_cloud = o3d.io.read_point_cloud(pc_path)
                point_cloud.transform(cam2object)
                added = pc_utils.pointcloud2image(copy.deepcopy(image), point_cloud, 3, intr)

                plt.cla()
                plt.subplot(1, 2, 1)
                plt.imshow(image)
                plt.title('Image')
                plt.axis('off')
                plt.subplot(1, 2, 2)
                plt.imshow(added)
                plt.title('Added')
                plt.axis('off')
                plt.show()

                if cancellationToken.is_cancelled:
                    break

    return print('Returning to Visualization Menu')
示例#4
0
    def __getitem__(self, index):

        # load data and meta data
        if index < self.len_data:
            if self.show_sample:
                print('view point sample')
            # load view point data
            img = Image.open('{0}/{1}.color.png'.format(
                self.root, self.list[index]))
            depth = Image.open('{0}/{1}.depth.png'.format(
                self.root, self.list[index]))
            with open('{0}/{1}.meta.json'.format(
                    self.root, self.list[index])) as json_file:
                image_meta = json.load(json_file)

            label = Image.open('{0}/{1}.{2}.label.png'.format(
                self.label_root, self.list[index], self.label_mode))
            with open('{0}/{1}.meta.json'.format(
                    self.label_root, self.list[index])) as json_file:
                meta = json.load(json_file)
        elif index < self.length:
            if self.show_sample:
                print('extra sample')
            # load extra data
            img = Image.open('{0}/{1}.color.png'.format(
                self.root, self.extra_data[self.extra_data_index]))
            depth = Image.open('{0}/{1}.depth.png'.format(
                self.root, self.extra_data[self.extra_data_index]))
            with open('{0}/{1}.meta.json'.format(
                    self.root,
                    self.extra_data[self.extra_data_index])) as json_file:
                image_meta = json.load(json_file)

            label = Image.open('{0}/{1}.new_pred.label.png'.format(
                self.label_root, self.extra_data[self.extra_data_index]))
            with open('{0}/{1}.meta.json'.format(
                    self.label_root,
                    self.extra_data[self.extra_data_index])) as json_file:
                meta = json.load(json_file)
            self.extra_data_index += 1
            if self.extra_data_index >= self.len_extra_data:
                self.extra_data_ids = np.arange(self.len_extra_data)
                np.random.shuffle(self.extra_data_ids)
                self.extra_data_index = 0
        else:
            raise ValueError

        # set the camera intrinsitcs
        cam_cx = image_meta['intr']['ppx']
        cam_cy = image_meta['intr']['ppy']
        cam_fx = image_meta['intr']['fx']
        cam_fy = image_meta['intr']['fy']

        # find the object class id
        obj = self.class_id_names.index(meta['cls_name'])

        # augment the data
        if self.add_noise:
            # change color
            img = self.trancolor(img)

            # rotate
            angle = random.uniform(-180, 180)
            augment_rotation = np.identity(4)
            augment_rotation[:3, :3] = transforms3d.euler.euler2mat(
                0, 0, np.deg2rad(angle))
            img = img.rotate(angle)
            label = label.rotate(angle)
            depth = depth.rotate(angle)

        # load tf from the camera to the robot
        cam2robot = np.array(meta['cam2robot']).reshape((4, 4))

        # if the image is rotated we need to rotate the camera before computing the target object pose
        if self.add_noise:
            cam2robot = np.dot(np.linalg.inv(augment_rotation), cam2robot)

        # compute object pose
        robot2object = np.array(meta['robot2object']).reshape((4, 4))
        cam2object = np.dot(cam2robot, robot2object)
        target_r = cam2object[:3, :3]
        target_t = cam2object[:3, 3]
        if self.to_meter:
            target_t = target_t / 1000

        #convert input numpy
        img = np.array(img)
        label = np.array(label)
        depth = np.array(depth)

        # get masks and bounding box around the object
        mask_label = ma.getmaskarray(ma.masked_equal(label, 255))
        rmin, rmax, cmin, cmax = get_bbox(mask_label)
        mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
        mask = mask_label * mask_depth

        # add some offset noise to the target position
        if self.add_noise:
            add_t = np.array([
                random.uniform(-self.noise_trans, self.noise_trans)
                for i in range(3)
            ])

        # select some points on the object
        choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
        if len(choose) > self.num_pt:
            c_mask = np.zeros(len(choose), dtype=int)
            c_mask[:self.num_pt] = 1
            np.random.shuffle(c_mask)
            choose = choose[c_mask.nonzero()]
        else:
            choose = np.pad(choose, (0, self.num_pt - len(choose)), 'wrap')

        # select the choosen points
        depth_masked = depth[rmin:rmax,
                             cmin:cmax].flatten()[choose][:,
                                                          np.newaxis].astype(
                                                              np.float32)
        xmap_masked = self.xmap[
            rmin:rmax,
            cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        ymap_masked = self.ymap[
            rmin:rmax,
            cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        choose = np.array([choose])

        # compute the cartesian space position of each pixel and sample to point cloud
        cam_scale = image_meta['depth_scale']

        pt2 = depth_masked * cam_scale
        if not self.to_meter:
            pt2 = pt2 * 1000

        pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
        pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy

        cloud = np.concatenate((pt0, pt1, pt2), axis=1)

        if self.add_noise:
            cloud = np.add(cloud, add_t)

        # if needed delete some points from the target model points to have the same number all the time
        dellist = [j for j in range(0, len(self.cld[obj]))]
        dellist = random.sample(dellist, len(self.cld[obj]) - self.num_pt_mesh)
        model_points = np.delete(self.cld[obj], dellist, axis=0)

        # rotate and move the target into the image
        target = np.dot(model_points, target_r.T)
        if self.add_noise:
            target = np.add(target, target_t + add_t)
        else:
            target = np.add(target, target_t)

        # show the created sample for debug (if wanted)
        if self.show_sample:
            fig, axs = plt.subplots(1, 2, constrained_layout=True)
            fig.suptitle('{}, {}'.format(meta['cls_name'], obj), fontsize=16)
            plt.subplot(1, 2, 1)
            image_cloud = pc_utils.pointcloud2image(img.copy(), cloud, 3,
                                                    image_meta['intr'])
            plt.imshow(image_cloud)
            plt.title('cloud')
            plt.subplot(1, 2, 2)
            plt.title('target')
            image_target = pc_utils.pointcloud2image(img.copy(), target, 3,
                                                     image_meta['intr'])
            plt.imshow(image_target)
            plt.show()

        # crop and convert image into torch image
        img_masked = np.transpose(np.array(img)[:, :, :3],
                                  (2, 0, 1))[:, rmin:rmax, cmin:cmax]

        # return sample as torch
        if self.mode == 'test':
            return torch.from_numpy(cloud.astype(np.float32)), \
                   torch.LongTensor(choose.astype(np.int32)), \
                   self.norm(torch.from_numpy(img_masked.astype(np.float32))), \
                   torch.from_numpy(target.astype(np.float32)), \
                   torch.from_numpy(model_points.astype(np.float32)), \
                   torch.LongTensor([int(obj)]), \
                   image_meta['intr'],\
                   img.copy()

        else:
            return torch.from_numpy(cloud.astype(np.float32)), \
                   torch.LongTensor(choose.astype(np.int32)), \
                   self.norm(torch.from_numpy(img_masked.astype(np.float32))), \
                   torch.from_numpy(target.astype(np.float32)), \
                   torch.from_numpy(model_points.astype(np.float32)), \
                   torch.LongTensor([int(obj)])
示例#5
0
def create_pose_label(root,
                      object_name,
                      global_regression,
                      icp_point2point,
                      icp_point2plane,
                      plot=False,
                      view_label=False,
                      with_extra=False):

    object_path = os.path.join(root, 'data_generation/data', object_name)
    dirs = os.listdir(object_path)
    background_path = os.path.join(object_path, 'background')
    pc_path = os.path.join(root, 'pc_reconstruction/data', object_name,
                           '{}_out.ply'.format(object_name))
    pco_path = os.path.join(root, 'pc_reconstruction/data', object_name,
                            '{}.ply'.format(object_name))

    try:
        i = dirs.index('background')
        del dirs[i]
    except:
        raise ValueError(
            'background does not exist in object_path: {}'.format(object_path))

    if 'extra' in dirs:
        i = dirs.index('extra')
        del dirs[i]
        if with_extra:
            dirs.append('extra')

    if len(dirs) < 1:
        raise ValueError('no foreground')

    remember_pos_and_rot = []
    for d in dirs:
        pc_position = None
        pc_rotation = None
        data_path = os.path.join(object_path, d)
        label_path = os.path.join(root, 'label_generator/data', object_name, d)
        print('Getting pc position for {}'.format(d))
        if d != 'extra':
            source = o3d.io.read_point_cloud(pc_path)
            pc_position = pc_utils.get_my_source_center(source)

            # exchange with read correct from meta

            for meta_file in os.listdir(data_path):
                if meta_file[-5:] == '.json':
                    with open(os.path.join(data_path, meta_file)) as f:
                        meta = json.load(f)
                        pc_rotation = np.array(
                            meta.get('object_pose')).reshape(4, 4)[:3, :3]
                    break

            old_rotation = np.rad2deg(
                transforms3d.euler.mat2euler(pc_rotation))
            if not np.array_equal(old_rotation, np.array([0.0, 0.0, 0.0])):

                # load point cloud
                pc_target_path = os.path.join(root, 'pc_reconstruction/data',
                                              object_name, '{}.ply'.format(d))

                target = o3d.io.read_point_cloud(pc_target_path)
                source = o3d.io.read_point_cloud(pc_path)

                #pc_utils.draw_registration_result(source, target, np.identity(4))
                # match pointcloud to current surface
                target, source, init_tf = pc_utils.icp_regression(
                    target,
                    source,
                    voxel_size=5,
                    threshold=10,
                    global_regression=global_regression,
                    icp_point2point=icp_point2point,
                    icp_point2plane=icp_point2plane,
                    plot=plot)

                print('requested rotation: {}'.format(old_rotation))
                pc_rotation = np.dot(pc_rotation, init_tf[:3, :3])
                diff_rotation = np.rad2deg(
                    transforms3d.euler.mat2euler(init_tf[:3, :3]))
                euler = np.array(transforms3d.euler.mat2euler(pc_rotation))
                # set zeros where there should be no rotations
                for i, angle in enumerate(old_rotation):
                    if angle == 0.0:
                        euler[i] = 0.0
                        diff_rotation[i] = 0.0

                pc_rotation = transforms3d.euler.euler2mat(
                    euler[0], euler[1], euler[2])
                new_rotation = np.rad2deg(
                    transforms3d.euler.mat2euler(pc_rotation))
                print('___')
                print('found rotation: {}'.format(new_rotation))
                print('diff in rotation: {}'.format(diff_rotation))
                print('old pc_position: {}'.format(pc_position))
                print('new pc_position: {}'.format(
                    np.array(source.get_center())))
                print('diff in pc_position: {}'.format(
                    np.array(source.get_center() - pc_position)))
                print('___')
                pc_position = np.array(pc_utils.get_my_source_center(source))

            remember_pos_and_rot.append({
                'old_rotation': old_rotation,
                'pc_position': pc_position,
                'pc_rotation': pc_rotation
            })

        samples = list(os.listdir(data_path))
        samples = [d[:-10] for d in samples if '.color.png' in d]
        for id in samples:
            with open(os.path.join(data_path, '{}.meta.json'.format(id))) as f:
                meta = json.load(f)
                intr = meta.get('intr')
                robotEndEff2Cam = np.array(
                    meta.get('hand_eye_calibration')).reshape((4, 4))
                robot2endEff_tf = np.array(
                    meta.get('robot2endEff_tf')).reshape((4, 4))
                object_pose = np.array(meta.get('object_pose')).reshape(
                    4, 4)[:3, :3]

            if d == 'extra':
                for rembered in remember_pos_and_rot:
                    object_rotation = np.rad2deg(
                        transforms3d.euler.mat2euler(object_pose))
                    if np.array_equal(object_rotation,
                                      rembered['old_rotation']):
                        pc_position = rembered['pc_position']
                        pc_rotation = rembered['pc_rotation']
                        break

            # get the transformation from the camera to the object
            robot2object = np.identity(4)
            robot2object[:3, :3] = pc_rotation
            robot2object[:3, 3] = pc_position

            cam2robot = np.dot(np.linalg.inv(robotEndEff2Cam),
                               np.linalg.inv(robot2endEff_tf))
            cam2object = np.dot(cam2robot, robot2object)

            object_position = cam2object[:3, 3]
            object_rotation = cam2object[:3, :3]

            pose_label = {
                'position': list(object_position),
                'rotation': list(object_rotation.flatten()),
                'cls_name': object_name,
                'cam2robot': list(cam2robot.flatten()),
                'robot2object': list(robot2object.flatten())
            }

            with open(os.path.join(label_path, '{}.meta.json'.format(id)),
                      'w') as f:
                json.dump(pose_label, f)

            if view_label:
                # paint the image
                with open(os.path.join(data_path, '{}.color.png'.format(id)),
                          'rb') as f:
                    image = np.array(Image.open(f).convert('RGB'))

                point_cloud = o3d.io.read_point_cloud(pco_path)
                point_cloud.transform(cam2object)
                image = pc_utils.pointcloud2image(image, point_cloud, 3, intr)
                plt.imshow(image)
                plt.show()
示例#6
0
def main(data_set_name,
         root,
         save_extra='',
         load_pretrained=True,
         load_trained=False,
         load_name='',
         label_mode='new_pred',
         p_extra_data=0.0,
         p_viewpoints=1.0,
         show_sample=False,
         plot_train=False,
         device_num=0):

    parser = argparse.ArgumentParser()
    parser.add_argument('--batch_size', type=int, default=8, help='batch size')
    parser.add_argument('--workers',
                        type=int,
                        default=8,
                        help='number of data loading workers')
    parser.add_argument('--lr', default=0.0001, help='learning rate')
    parser.add_argument('--lr_rate',
                        default=0.3,
                        help='learning rate decay rate')
    parser.add_argument('--w', default=0.015, help='learning rate')
    parser.add_argument('--w_rate',
                        default=0.3,
                        help='learning rate decay rate')
    parser.add_argument('--decay_margin',
                        default=0.016,
                        help='margin to decay lr & w')
    parser.add_argument(
        '--refine_margin',
        default=0.010,
        help='margin to start the training of iterative refinement')
    parser.add_argument(
        '--noise_trans',
        default=0.03,
        help=
        'range of the random noise of translation added to the training data')
    parser.add_argument('--iteration',
                        type=int,
                        default=2,
                        help='number of refinement iterations')
    parser.add_argument('--nepoch',
                        type=int,
                        default=500,
                        help='max number of epochs to train')
    parser.add_argument('--refine_epoch_margin',
                        type=int,
                        default=400,
                        help='max number of epochs to train')
    parser.add_argument('--start_epoch',
                        type=int,
                        default=1,
                        help='which epoch to start')
    opt = parser.parse_args()

    opt.manualSeed = random.randint(1, 10000)

    torch.cuda.set_device(device_num)

    random.seed(opt.manualSeed)
    torch.manual_seed(opt.manualSeed)

    print('bs', opt.batch_size, 'it', opt.iteration)

    opt.refine_start = False
    opt.num_points = 1000  #number of points on the input pointcloud
    opt.outf = os.path.join(root, 'DenseFusion/trained_models', data_set_name +
                            save_extra)  #folder to save trained models
    if not os.path.exists(opt.outf):
        os.makedirs(opt.outf)
    opt.log_dir = os.path.join(root, 'DenseFusion/experiments/logs',
                               data_set_name +
                               save_extra)  #folder to save logs
    opt.log_dir_images = os.path.join(root, 'DenseFusion/experiments/logs',
                                      data_set_name + save_extra, 'images')
    if not os.path.exists(opt.log_dir):
        os.makedirs(opt.log_dir)
    if not os.path.exists(opt.log_dir_images):
        os.makedirs(opt.log_dir_images)

    opt.repeat_epoch = 1  #number of repeat times for one epoch training
    print('create datasets')
    dataset = PoseDataset('train',
                          opt.num_points,
                          True,
                          0.0,
                          opt.refine_start,
                          data_set_name,
                          root,
                          show_sample=show_sample,
                          label_mode=label_mode,
                          p_extra_data=p_extra_data,
                          p_viewpoints=p_viewpoints)

    test_dataset = PoseDataset('test',
                               opt.num_points,
                               False,
                               0.0,
                               opt.refine_start,
                               data_set_name,
                               root,
                               show_sample=show_sample,
                               label_mode=label_mode,
                               p_extra_data=p_extra_data,
                               p_viewpoints=p_viewpoints)

    opt.num_objects = dataset.num_classes  #number of object classes in the dataset
    print('n classes: {}'.format(dataset.num_classes))

    print('create models')
    estimator = PoseNet(num_points=opt.num_points, num_obj=opt.num_objects)
    estimator.cuda()
    refiner = PoseRefineNet(num_points=opt.num_points, num_obj=opt.num_objects)
    refiner.cuda()

    if load_pretrained:
        # load the pretrained estimator model on the ycb dataset, leave the last layer due to mismatch
        init_state_dict = estimator.state_dict()
        pretrained_dict = torch.load(
            os.path.join(root, 'DenseFusion/trained_models/pose_model.pth'))
        pretrained_dict['conv4_r.weight'] = init_state_dict['conv4_r.weight']
        pretrained_dict['conv4_r.bias'] = init_state_dict['conv4_r.bias']
        pretrained_dict['conv4_t.weight'] = init_state_dict['conv4_t.weight']
        pretrained_dict['conv4_t.bias'] = init_state_dict['conv4_t.bias']
        pretrained_dict['conv4_c.weight'] = init_state_dict['conv4_c.weight']
        pretrained_dict['conv4_c.bias'] = init_state_dict['conv4_c.bias']
        estimator.load_state_dict(pretrained_dict)

        del init_state_dict
        del pretrained_dict

        # load the pretrained refiner model on the ycb dataset, leave the last layer due to mismatch
        init_state_dict = refiner.state_dict()
        pretrained_dict = torch.load(
            os.path.join(root,
                         'DenseFusion/trained_models/pose_refine_model.pth'))
        pretrained_dict['conv3_r.weight'] = init_state_dict['conv3_r.weight']
        pretrained_dict['conv3_r.bias'] = init_state_dict['conv3_r.bias']
        pretrained_dict['conv3_t.weight'] = init_state_dict['conv3_t.weight']
        pretrained_dict['conv3_t.bias'] = init_state_dict['conv3_t.bias']
        refiner.load_state_dict(pretrained_dict)

        del init_state_dict
        del pretrained_dict
    elif load_trained:
        loading_path = os.path.join(
            root,
            'DenseFusion/trained_models/{}/pose_model.pth'.format(load_name))
        pretrained_dict = torch.load(loading_path)
        estimator.load_state_dict(pretrained_dict)

        loading_path = os.path.join(
            root, 'DenseFusion/trained_models/{}/pose_refine_model.pth'.format(
                load_name))
        pretrained_dict = torch.load(loading_path)
        refiner.load_state_dict(pretrained_dict)
        del pretrained_dict

    print('create optimizer and dataloader')
    #opt.refine_start = False
    opt.decay_start = False
    optimizer = optim.Adam(estimator.parameters(), lr=opt.lr)

    #dataloader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True, num_workers=opt.workers,
    #                                         collate_fn=collate_fn)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=True,
                                             num_workers=opt.workers)

    testdataloader = torch.utils.data.DataLoader(test_dataset,
                                                 batch_size=1,
                                                 shuffle=False,
                                                 num_workers=opt.workers)

    opt.sym_list = dataset.get_sym_list()
    opt.num_points_mesh = dataset.get_num_points_mesh()

    print(
        '>>>>>>>>----------Dataset loaded!---------<<<<<<<<\nlength of the training set: {0}'
        '\nlength of the testing set: {1}\nnumber of sample points on mesh: {2}\nsymmetry object list: {3}'
        .format(len(dataset), len(test_dataset), opt.num_points_mesh,
                opt.sym_list))

    criterion = Loss(opt.num_points_mesh, opt.sym_list)
    criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list)

    best_test = np.Inf
    best_test_epoch = 0
    best_train = np.Inf
    best_train_epoch = 0
    if opt.start_epoch == 1:
        for log in os.listdir(opt.log_dir):
            if log != 'images':
                os.remove(os.path.join(opt.log_dir, log))
        for img in os.listdir(opt.log_dir_images):
            os.remove(os.path.join(opt.log_dir_images, img))

    train_dists = []
    test_dists = []
    losses = []
    refiner_losses = []
    best_loss = np.inf
    best_loss_epoch = 0
    elapsed_times = 0.0

    for epoch in range(opt.start_epoch, opt.nepoch):
        start_time = time.time()
        train_count = 0
        train_dis_avg = 0.0

        if opt.refine_start:
            estimator.eval()
            refiner.train()
        else:
            estimator.train()

        optimizer.zero_grad()

        epoch_losses = []
        epoch_losses_refiner = []
        for rep in range(opt.repeat_epoch):
            #for batch in dataloader:
            #points, choose, img, target, model_points, idx = batch
            #print(points.shape, choose.shape, img.shape, target.shape, model_points.shape)
            for i, data in enumerate(dataloader, 0):
                points, choose, img, target, model_points, idx = data

                #print(points.shape, choose.shape, img.shape, target.shape, model_points.shape)
                points, choose, img, target, model_points, idx = Variable(points).cuda(), \
                                                                 Variable(choose).cuda(), \
                                                                 Variable(img).cuda(), \
                                                                 Variable(target).cuda(), \
                                                                 Variable(model_points).cuda(), \
                                                                 Variable(idx).cuda()
                pred_r, pred_t, pred_c, emb = estimator(
                    img, points, choose, idx)
                loss, dis, new_points, new_target, pred = criterion(
                    pred_r, pred_t, pred_c, target, model_points, idx, points,
                    opt.w, opt.refine_start)
                epoch_losses.append(loss.item())
                if opt.refine_start:
                    for ite in range(0, opt.iteration):
                        pred_r, pred_t = refiner(new_points, emb, idx)
                        dis, new_points, new_target, pred = criterion_refine(
                            pred_r, pred_t, new_target, model_points, idx,
                            new_points)
                        dis.backward()
                    epoch_losses_refiner.append(dis.item())
                else:
                    loss.backward()
                    epoch_losses_refiner.append(0)
                train_dis_avg += dis.item()
                train_count += 1

                # make step after one epoch
                if train_count % opt.batch_size == 0:
                    optimizer.step()
                    optimizer.zero_grad()

            # make last step of epoch if something is remaining
            if train_count % opt.batch_size != 0:
                optimizer.step()
                optimizer.zero_grad()

        refiner_losses.append(np.mean(epoch_losses_refiner))
        losses.append(np.mean(epoch_losses))
        if losses[-1] < best_loss:
            best_loss = losses[-1]
            best_loss_epoch = epoch

        train_dists.append(train_dis_avg / train_count)
        if train_dists[-1] < best_train:
            best_train_epoch = epoch
            best_train = train_dists[-1]

        test_dis = 0.0
        test_count = 0
        estimator.eval()
        refiner.eval()

        if plot_train:
            # plot randomly selected validation preds
            jj = 0
            x_axis = 0
            fig_x = 4
            fig_y = 4
            log_indexes = sorted(
                list(
                    np.random.choice(list(range(len(testdataloader))),
                                     int(fig_x * (fig_y / 2)),
                                     replace=False)))

            plt.cla()
            plt.close('all')
            fig, axs = plt.subplots(fig_x,
                                    fig_y,
                                    constrained_layout=True,
                                    figsize=(25, 15))

        for j, data in enumerate(testdataloader, 0):
            points, choose, img, target, model_points, idx, intr, np_img = data
            points, choose, img, target, model_points, idx = Variable(points).cuda(), \
                                                             Variable(choose).cuda(), \
                                                             Variable(img).cuda(), \
                                                             Variable(target).cuda(), \
                                                             Variable(model_points).cuda(), \
                                                             Variable(idx).cuda()
            pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)

            if plot_train:
                if j in log_indexes:
                    my_pred, my_r, my_t = my_estimator_prediction(
                        pred_r, pred_t, pred_c, opt.num_points, 1, points)

            _, dis, new_points, new_target, pred = criterion(
                pred_r, pred_t, pred_c, target, model_points, idx, points,
                opt.w, opt.refine_start)
            if opt.refine_start:
                for ite in range(0, opt.iteration):
                    pred_r, pred_t = refiner(new_points, emb, idx)

                    if plot_train:
                        if j in log_indexes:
                            my_pred, my_r, my_t = my_refined_prediction(
                                pred_r, pred_t, my_r, my_t)

                    dis, new_points, new_target, pred = criterion_refine(
                        pred_r, pred_t, new_target, model_points, idx,
                        new_points)

            if plot_train:
                if j in log_indexes:
                    if jj == 4:
                        jj = 0
                        x_axis += 1

                    my_r = quaternion_matrix(my_r)[:3, :3]
                    np_pred = np.dot(model_points[0].data.cpu().numpy(),
                                     my_r.T)
                    np_pred = np.add(np_pred, my_t)
                    np_target = target[0].data.cpu().numpy()
                    np_img = np_img[0].data.numpy()

                    image_target = pc_utils.pointcloud2image(
                        np_img.copy(), np_target, 3, intr)
                    image_prediction = pc_utils.pointcloud2image(
                        np_img.copy(), np_pred, 3, intr)

                    axs[x_axis, jj].imshow(image_target)
                    axs[x_axis, jj].set_title('target {}'.format(j))
                    axs[x_axis, jj].set_axis_off()
                    jj += 1

                    axs[x_axis, jj].imshow(image_prediction)
                    axs[x_axis, jj].set_title('prediction {}'.format(j))
                    axs[x_axis, jj].set_axis_off()
                    jj += 1

            test_dis += dis.item()
            test_count += 1

        test_dis = test_dis / test_count

        test_dists.append(test_dis)

        if plot_train:
            fig.suptitle('epoch {}, with a average dist: {}'.format(
                epoch, test_dis),
                         fontsize=16)
            plt.savefig(
                os.path.join(opt.log_dir_images,
                             'test_images_epoch_{}.png'.format(epoch)))

            if epoch > 1:
                plt.close('all')
                plt.cla()
                fig, axs = plt.subplots(2,
                                        2,
                                        constrained_layout=True,
                                        figsize=(30, 20))
                axs[0, 0].plot(losses)
                axs[0, 0].set_title('Training estimator loss')
                axs[0, 0].set_xlabel('Epochs')
                axs[0, 0].set_ylabel('Loss')

                axs[0, 1].plot(refiner_losses)
                axs[0, 1].set_title('Training refiner loss')
                axs[0, 1].set_xlabel('Epochs')
                axs[0, 1].set_ylabel('Loss')

                axs[1, 0].plot(train_dists)
                axs[1, 0].set_title('Training Avg. distance')
                axs[1, 0].set_xlabel('Epochs')
                axs[1, 0].set_ylabel('Avg. distance [m]')

                axs[1, 1].plot(test_dists)
                axs[1, 1].set_title('Test Avg. distance')
                axs[1, 1].set_xlabel('Epochs')
                axs[1, 1].set_ylabel('Avg. distance [m]')

                plt.savefig(os.path.join(opt.log_dir_images, 'losses.png'))

        out_dict = {
            'losses': losses,
            'refiner_losses': refiner_losses,
            'train_dists': train_dists,
            'test_dists': test_dists
        }
        with open(os.path.join(opt.log_dir, 'losses.json'), 'w') as outfile:
            json.dump(out_dict, outfile)
        del out_dict

        print('>>>>>>>>----------Epoch {0} finished---------<<<<<<<<'.format(
            epoch))
        if test_dis <= best_test:
            best_test = test_dis
            best_test_epoch = epoch
            if opt.refine_start:
                state_dict = refiner.state_dict()
                torch.save(state_dict,
                           '{0}/pose_refine_model.pth'.format(opt.outf))
                del state_dict
            else:
                state_dict = estimator.state_dict()
                torch.save(state_dict, '{0}/pose_model.pth'.format(opt.outf))
                del state_dict

            print('>>>>>>>>----------MODEL SAVED---------<<<<<<<<')

        t_elapsed = time.time() - start_time
        elapsed_times += t_elapsed / 3600
        print('elapsed time: {} min, total elapsed time: {} hours'.format(
            np.round(t_elapsed / 60, 2), np.round(elapsed_times), 2))

        print('Train loss           : {}'.format(losses[-1]))
        print('Best train loss {}   : {}'.format(best_loss_epoch, best_loss))
        print('Train dist           : {}'.format(train_dists[-1]))
        print('Best train dist {}   : {}'.format(best_train_epoch, best_train))
        print('Test dist            : {}'.format(test_dists[-1]))
        print('Best test dist {}    : {}'.format(best_test_epoch, best_test))

        # changing stuff during training if...
        if best_test < opt.decay_margin and not opt.decay_start:
            print('decay lr')
            opt.decay_start = True
            opt.lr *= opt.lr_rate
            opt.w *= opt.w_rate
            optimizer = optim.Adam(estimator.parameters(), lr=opt.lr)

        if (best_test < opt.refine_margin
                or epoch >= opt.refine_epoch_margin) and not opt.refine_start:
            #print('train refiner')
            opt.refine_start = True
            print('bs', opt.batch_size, 'it', opt.iteration)
            opt.batch_size = int(opt.batch_size / opt.iteration)
            print('new bs', opt.batch_size)
            optimizer = optim.Adam(refiner.parameters(), lr=opt.lr)

            #dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.workers)
            #testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers)

            #opt.sym_list = dataset.get_sym_list()
            #opt.num_points_mesh = dataset.get_num_points_mesh()

            print('>>>>>>>>----------train refiner!---------<<<<<<<<')
            criterion = Loss(opt.num_points_mesh, opt.sym_list)
            criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list)