Exemplo n.º 1
0
class PiCar:

    # Camera Info
    SCREEN_WIDTH  = 200
    SCREEN_HEIGHT = 66
    FRAME_RATE    = 24 

    def __init__(self, record_training_data=False, model=None):
        print('Setting up PiCar...')
        
        if record_training_data and not model:
            self.record_training_data = True
            print('Recording training data...')
        else:
            self.record_training_data = False

        if model:
            self.model_manager = ModelManager()
            self.model_manager.load_model(model)
        else:
            self.model_manager = None
            
        self.dc = DriveController()
        self.aws_manager = AWSManager()
        self.setup_camera()

        # Structs for collecting training images and labels
        self.training_images = []
        self.training_labels = []

        # Stores the current user drive instruction
        self.current_drive_input = 'forward'

        # Using pygame in process remote keyboard inputs
        pygame.init()
        pygame.display.set_mode((100, 100))

    def setup_camera(self):
        print('Initializing camera...')

        self.camera = PiCamera()
        self.camera.resolution = (self.SCREEN_WIDTH, self.SCREEN_HEIGHT)
        self.camera.framerate = self.FRAME_RATE
        self.raw_capture = PiRGBArray(self.camera, size=(self.SCREEN_WIDTH, self.SCREEN_HEIGHT))

        # allow the camera to warm up
        time.sleep(1)

        print('Camera initialization complete...')
    
    def process_user_inputs(self):
        for event in pygame.event.get():
            if event.type == pygame.KEYUP:
                self.dc.forward()
                self.current_drive_input = 'forward'
            elif event.type == pygame.KEYDOWN:
                if event.key == pygame.K_RIGHT:
                    self.dc.pivot_right()
                    self.current_drive_input = 'right'
                elif event.key == pygame.K_LEFT:
                    self.dc.pivot_left()
                    self.current_drive_input = 'left'
                else:
                    self.dc.forward()
                    self.current_drive_input = 'forward'

    def convert_model_output_to_drive_command(self, model_output):
        print(str(model_output))
        max_index = np.argmax(model_output)
        if max_index == 0:
            self.dc.forward()
            self.current_drive_input = 'forward'
        elif max_index == 1:
            self.dc.pivot_right()
            self.current_drive_input = 'right'
        elif max_index == 2:
            self.dc.pivot_left()
            self.current_drive_input = 'left'

        print(self.current_drive_input)

    def drive(self):
        for frame in self.camera.capture_continuous(self.raw_capture, format="bgr", use_video_port=True):

            image = frame.array

            if self.model_manager:
                # Use model to steer PiCar
                image = image / 255.0
                output = self.model_manager.run_inference(image)
                self.convert_model_output_to_drive_command(output)
            else: 
                # Use remote keyboard inputs to steer PiCar
                self.process_user_inputs() 

            # Record training data
            if self.record_training_data:
                self.training_images.append(image)
                self.training_labels.append(self.current_drive_input)
            
            # Display camera feed
            cv2.imshow("Feed", image)
            key = cv2.waitKey(1) & 0xFF
            self.raw_capture.truncate(0)

            # Exit program if user pressed 'q'
            if key == ord("q"):
                self.dc.stop()

                # Upload training data to AWS before exiting
                if self.record_training_data:
                    self.aws_manager.upload_training_data(self.training_images, self.training_labels)
                break
Exemplo n.º 2
0
class InferenceManager:
    """
    Main training script called from main.py.

    """

    def __init__(self, options):
        print('---------------')
        self.opt = options
        # Create network and optimiser
        self.model_manager = ModelManager(self.opt)
        assert self.opt.load_path is not None
        self.model_manager.load_model(weights_path=self.opt.load_path, load_optimiser=False)

        # extract model, optimiser and scheduler for easier access
        self.model = self.model_manager.model
        self.model.eval()

        path_info = load_config(self.opt.config_path)

        self.test_loaders = {}
        for test_data_type in self.opt.test_data_types:

            data_path = path_info[test_data_type]
            width, height = sizes_lookup[self.opt.network][test_data_type]

            # create dataloaders
            folder = 'kitti' if 'kitti' in test_data_type else test_data_type
            textfile = test_data_type + '.txt' if 'kitti' in test_data_type else 'test_files.txt'
            filename_path = os.path.join('splits', folder, textfile)
            test_filenames = readlines(filename_path)

            dataset_class = data_type_lookup[test_data_type]
            test_dataset = dataset_class(data_path,
                                         test_filenames, height,
                                         width, is_train=False,
                                         disable_normalisation=self.opt.disable_normalisation,
                                         kitti2012=test_data_type == 'kitti2012',
                                         load_gt=test_data_type != 'kitti2015submission')

            test_loader = DataLoader(test_dataset, shuffle=False, drop_last=False,
                                     num_workers=self.opt.num_workers,
                                     batch_size=1)

            self.test_loaders[test_data_type] = test_loader
        self.error_metrics = defaultdict(list)
        self.resized_disps = []

    def run_inference(self):

        all_errors = {}

        for data_type, loader in self.test_loaders.items():
            print('---------------')
            print('running evaluation on:')
            print(data_type)

            self.error_metrics = defaultdict(list)
            self.resized_disps = []
            with torch.no_grad():
                for inputs in tqdm(loader, ncols=60, position=0, leave=True):
                    _ = self.process_batch(inputs,
                                           compute_errors=data_type not in ['flicker',
                                                                            'kitti2015submission'])

                for key, val in self.error_metrics.items():
                    self.error_metrics[key] = str(np.round(np.mean(val), 5))
                all_errors[data_type] = self.error_metrics

                if self.opt.save_disparities:
                    # also save resized disparities for visualisation
                    _savepath = os.path.join(self.opt.load_path, data_type, 'npys')
                    os.makedirs(_savepath, exist_ok=True)
                    for idx, disp in enumerate(self.resized_disps):
                        np.save(os.path.join(_savepath, '{}.npy'.format(str(idx).zfill(3))), disp)

                    if data_type == 'kitti2015submission':
                        _savepath = os.path.join(_savepath, 'disp_0')
                        os.makedirs(_savepath, exist_ok=True)
                        for idx, disp in enumerate(self.resized_disps):
                            disp = (disp * 256).astype(np.uint16)
                            print(disp.shape)
                            io.imsave(os.path.join(_savepath,
                                                   '{}_10.png'.format(str(idx).zfill(6))), disp)

        print('Finished inference!')
        print('---------------')
        for data_type, errors in all_errors.items():
            print('Metrics for {}:'.format(data_type))
            for key, error in errors.items():
                print('{} -- {}'.format(key, error))
            print('---------------')

        with open(os.path.join(self.opt.load_path, 'eval_results.json'), 'w') as file_handler:
            json.dump(all_errors, file_handler, indent=2)

    def process_batch(self, inputs, compute_errors=True):

        # move to GPU
        if torch.cuda.is_available():
            for key, val in inputs.items():
                inputs[key] = val.cuda()

        outputs = self.model(inputs['image'], inputs['stereo_image'])
        preds = outputs[('raw', 0)][:, 0].cpu().numpy()

        # get errors
        gts = inputs['disparity'].cpu().numpy()
        for i in range(len(gts)):
            # resize and rescale prediction to match gt
            height, width = gts[i].shape
            pred_disp = cv2.resize(preds[i], dsize=(width, height)) * width / preds[i].shape[
                1]

            if compute_errors:
                d1, d2, d3, EPE = self.compute_errors(gts[i], pred_disp)
                self.error_metrics['d1'].append(d1)
                self.error_metrics['d2'].append(d2)
                self.error_metrics['d3'].append(d3)
                self.error_metrics['EPE'].append(EPE)

            if self.opt.save_disparities:
                self.resized_disps.append(pred_disp)

        return outputs

    def compute_errors(self, gt_disp, pred_disp):

        mask = gt_disp > 0
        abs_diff = np.abs(gt_disp[mask] - pred_disp[mask])
        EPE = abs_diff.mean()

        d1 = (abs_diff >= 1).sum() / mask.sum()
        d2 = (abs_diff >= 2).sum() / mask.sum()
        d3 = (abs_diff >= 3).sum() / mask.sum()
        return d1, d2, d3, EPE
Exemplo n.º 3
0
class TrainManager:
    """
    Main training script called from main.py.

    """
    def __init__(self, options):
        print('---------------')
        print('setting up...')
        self.opt = options
        # Create network and optimiser
        self.model_manager = ModelManager(self.opt)
        if self.opt.load_path is not None:
            self.model_manager.load_model(weights_path=self.opt.load_path,
                                          load_optimiser=False)

        # extract model, optimiser and scheduler for easier access
        self.model = self.model_manager.model
        self.optimiser = self.model_manager.optimiser
        self.scheduler = self.model_manager.scheduler
        self.scales = self.model_manager.scales
        print('models done!')

        path_info = load_config(self.opt.config_path)

        train_datasets = []
        val_datasets = []
        for dataset_type in self.opt.training_datasets:
            dataset_path = path_info[dataset_type]
            train_filenames = readlines(
                os.path.join('splits', dataset_type, 'train_files_all.txt'))

            val_filenames = 'val_files_all.txt' if dataset_type != 'sceneflow' else 'test_files.txt'
            val_filenames = readlines(
                os.path.join('splits', dataset_type, val_filenames))
            dataset_class = dataset_lookup[dataset_type]

            # subsample data optionally
            if self.opt.data_sampling != 1.0:
                sampling = self.opt.data_sampling
                assert sampling > 0
                assert sampling < 1.0
                train_filenames = list(
                    np.random.choice(np.array(train_filenames),
                                     int(sampling * len(train_filenames)),
                                     replace=False))

            train_dataset = dataset_class(
                dataset_path,
                train_filenames,
                self.opt.height,
                self.opt.width,
                is_train=True,
                disable_normalisation=self.opt.disable_normalisation,
                max_disparity=self.opt.max_disparity,
                keep_aspect_ratio=True,
                disable_synthetic_augmentation=self.opt.
                disable_synthetic_augmentation,
                disable_sharpening=self.opt.disable_sharpening,
                monodepth_model=self.opt.monodepth_model,
                disable_background=self.opt.disable_background)
            val_dataset = dataset_class(
                dataset_path,
                val_filenames,
                self.opt.height,
                self.opt.width,
                is_train=False,
                disable_normalisation=self.opt.disable_normalisation,
                max_disparity=self.opt.max_disparity,
                keep_aspect_ratio=True,
                disable_synthetic_augmentation=self.opt.
                disable_synthetic_augmentation,
                disable_sharpening=self.opt.disable_sharpening,
                monodepth_model=self.opt.monodepth_model,
                disable_background=self.opt.disable_background)
            train_datasets.append(train_dataset)
            val_datasets.append(val_dataset)

        self.train_dataset = ConcatDataset(train_datasets)
        self.val_dataset = ConcatDataset(val_datasets)

        # use custom sampler so we can continue from specific step
        my_sampler = MyRandomSampler(self.train_dataset,
                                     start_step=self.opt.start_step)
        self.train_loader = DataLoader(self.train_dataset,
                                       sampler=my_sampler,
                                       drop_last=True,
                                       num_workers=self.opt.num_workers,
                                       batch_size=self.opt.batch_size)
        self.val_loader = DataLoader(self.val_dataset,
                                     shuffle=True,
                                     drop_last=True,
                                     num_workers=1,
                                     batch_size=self.opt.batch_size)
        self.val_iter = iter(self.val_loader)

        print('datasets done!')
        print('dataset info:')
        print('training on {} images, validating on {} images'.format(
            len(self.train_dataset), len(self.val_dataset)))

        # Set up tensorboard writers and logger
        self.train_writer = SummaryWriter(
            os.path.join(self.opt.log_path, self.opt.model_name, 'train'))
        self.val_writer = SummaryWriter(
            os.path.join(self.opt.log_path, self.opt.model_name, 'val'))
        os.makedirs(self.opt.log_path, exist_ok=True)
        self.step = 0
        self.epoch = 0
        self.training_complete = False

        print('training setup complete!')
        print('---------------')

    def train(self):

        print('training...')
        while not self.training_complete:
            self.run_epoch()
            self.epoch += 1

        print('training complete!')

    def run_epoch(self):

        if self.step < self.opt.start_step:
            print('skipping up to step {}'.format(self.opt.start_step))

        for idx, inputs in enumerate(self.train_loader):

            start_time = time.time()

            outputs, losses = self.process_batch(inputs, compute_loss=True)

            # Update weights
            loss = losses['loss']
            self.model.zero_grad()
            loss.backward()
            self.optimiser.step()
            for group in self.optimiser.param_groups:
                self.lr = group['lr']

            print('step {} - time {}'.format(
                self.step, round(time.time() - start_time, 3)))

            # validate and log
            if self.step % self.opt.log_freq == 0:
                self.log(self.train_writer, inputs, outputs, losses)

                self.model.eval()
                self.val()
                self.model.train()

            if self.step % 10000 == 0:
                self.model_manager.save_model(
                    folder_name='weights_{}'.format(self.step))

            self.step += 1

            if self.step >= self.opt.training_steps:
                self.training_complete = True
                break

        print('Epoch {} complete!'.format(self.epoch))
        self.model_manager.save_model(
            folder_name='weights_{}'.format(self.step))
        self.scheduler.step()

    def val(self):

        with torch.no_grad():
            try:
                inputs = self.val_iter.next()
            except StopIteration:
                self.val_iter = iter(self.val_loader)
                inputs = self.val_iter.next()

            outputs, losses = self.process_batch(inputs, compute_loss=True)

        self.log(self.val_writer, inputs, outputs, losses)

    def process_batch(self, inputs, compute_loss=False):

        # move to GPU
        if torch.cuda.is_available():
            for key, val in inputs.items():
                inputs[key] = val.cuda()

        outputs = self.model(inputs['image'], inputs['stereo_image'])

        for scale in range(self.scales):
            # upsample to full resolution
            pred = F.interpolate(outputs[('raw', scale)],
                                 mode='bilinear',
                                 size=(self.opt.height, self.opt.width),
                                 align_corners=True)

            pred_disp = pred[:, 0]
            outputs[('disp', scale)] = pred_disp

        # get losses
        if compute_loss:
            losses = self.compute_losses(inputs, outputs)
        else:
            losses = {}

        return outputs, losses

    def compute_losses(self, inputs, outputs):

        losses = {}
        total_loss = 0

        for scale in range(self.scales):

            pred_disp = outputs[('disp', scale)]

            # compute loss on disparity
            target_disp = torch.clamp(inputs['disparity'],
                                      max=self.opt.max_disparity)

            disparity_loss = (torch.abs(pred_disp - target_disp) *
                              (target_disp > 0).float()).mean()

            total_loss += disparity_loss
            losses['disp_loss/{}'.format(scale)] = disparity_loss

        total_loss /= self.scales

        losses['loss'] = total_loss

        return losses

    def warp_stereo_image(self, stereo_image, disparity):
        """Note - for logging only"""

        height, width = disparity.shape

        xs, ys = np.meshgrid(range(width), range(height))
        xs, ys = torch.from_numpy(xs).float(), torch.from_numpy(ys).float()

        xs = xs - disparity
        xs = ((xs / (width - 1)) - 0.5) * 2
        ys = ((ys / (height - 1)) - 0.5) * 2
        sample_pix = torch.stack([xs, ys], 2)

        warped_image = F.grid_sample(stereo_image.unsqueeze(0),
                                     sample_pix.unsqueeze(0),
                                     padding_mode='border',
                                     align_corners=True)

        return warped_image[0]

    def log(self, writer, inputs, outputs, losses):
        print('logging')
        writer.add_scalar('lr', self.lr, self.step)

        # write to tensorboard
        for loss_type, loss in losses.items():
            writer.add_scalar('{}'.format(loss_type), loss, self.step)

        for i in range(min(4, len(inputs['image']))):

            writer.add_image('image_l/{}'.format(i),
                             normalise_image(inputs['image'][i]), self.step)
            writer.add_image('image_r/{}'.format(i),
                             normalise_image(inputs['stereo_image'][i]),
                             self.step)

            if inputs.get('disparity') is not None:
                writer.add_image('disp_target/{}'.format(i),
                                 normalise_image(inputs['disparity'][i]),
                                 self.step)

                warped_image = self.warp_stereo_image(
                    inputs['stereo_image'][i].cpu(),
                    inputs['disparity'][i].cpu())
                writer.add_image('warped_gt_image/{}'.format(i),
                                 normalise_image(warped_image), self.step)

            if inputs.get('mono_disparity') is not None:
                writer.add_image('mono_disparity/{}'.format(i),
                                 normalise_image(inputs['mono_disparity'][i]),
                                 self.step)

            if inputs.get('occlusion_mask') is not None:
                writer.add_image('occlusion_mask/{}'.format(i),
                                 normalise_image(inputs['occlusion_mask'][i]),
                                 self.step)

            writer.add_image('disp_pred/{}'.format(i),
                             normalise_image(outputs[('disp', 0)][i]),
                             self.step)

            warped_image = self.warp_stereo_image(
                inputs['stereo_image'][i].cpu(), outputs[('disp', 0)][i].cpu())
            writer.add_image('warped_image/{}'.format(i),
                             normalise_image(warped_image), self.step)