Пример #1
0
    def test_more_map_operations(self):
        transformed = TubSequence.build_pipeline(
            self.sequence,
            x_transform=lambda record: record.underlying['user/angle'],
            y_transform=lambda record: record.underlying['user/throttle'])

        transformed_2 = TubSequence.build_pipeline(
            self.sequence,
            x_transform=lambda record: record.underlying['user/angle'] * 2,
            y_transform=lambda record: record.underlying['user/throttle'] * 2)

        transformed_3 = TubSequence.map_pipeline(
            x_transform=lambda x: x,
            y_transform=lambda y: y,
            pipeline=transformed_2
        )

        self.assertEqual(len(transformed), size)
        self.assertEqual(len(transformed_2), size)
        self.assertEqual(len(transformed_3), size)

        transformed_list = list(transformed)
        transformed_list_2 = list(transformed_3)
        index = np.random.randint(0, 9)

        x1, y1 = transformed_list[index]
        x2, y2 = transformed_list_2[index]

        self.assertAlmostEqual(x1 * 2, x2)
        self.assertAlmostEqual(y1 * 2, y2)
Пример #2
0
class BatchSequence(object):
    """
    The idea is to have a shallow sequence with types that can hydrate
    themselves to np.ndarray initially and later into the types required by
    tf.data (i.e. dictionaries or np.ndarrays).
    """
    def __init__(self, model: KerasPilot, config: Config,
                 records: List[TubRecord], is_train: bool) -> None:
        self.model = model
        self.config = config
        self.sequence = TubSequence(records)
        self.batch_size = self.config.BATCH_SIZE
        self.is_train = is_train
        self.augmentation = ImageAugmentation(config, 'AUGMENTATIONS')
        self.transformation = ImageAugmentation(config, 'TRANSFORMATIONS')
        self.pipeline = self._create_pipeline()

    def __len__(self) -> int:
        return math.ceil(len(self.pipeline) / self.batch_size)

    def image_processor(self, img_arr):
        """ Transformes the images and augments if in training. Then
            normalizes it. """
        img_arr = self.transformation.run(img_arr)
        if self.is_train:
            img_arr = self.augmentation.run(img_arr)
        norm_img = normalize_image(img_arr)
        return norm_img

    def _create_pipeline(self) -> TfmIterator:
        """ This can be overridden if more complicated pipelines are
            required """

        # 1. Initialise TubRecord -> x, y transformations
        def get_x(record: TubRecord) -> Dict[str, Union[float, np.ndarray]]:
            """ Extracting x from record for training"""
            out_tuple = self.model.x_transform_and_process(
                record, self.image_processor)
            # convert tuple to dictionary which is understood by tf.data
            out_dict = self.model.x_translate(out_tuple)
            return out_dict

        def get_y(record: TubRecord) -> Dict[str, Union[float, np.ndarray]]:
            """ Extracting y from record for training """
            y0 = self.model.y_transform(record)
            y1 = self.model.y_translate(y0)
            return y1

        # 2. Build pipeline using the transformations
        pipeline = self.sequence.build_pipeline(x_transform=get_x,
                                                y_transform=get_y)
        return pipeline

    def create_tf_data(self) -> tf.data.Dataset:
        """ Assembles the tf data pipeline """
        dataset = tf.data.Dataset.from_generator(
            generator=lambda: self.pipeline,
            output_types=self.model.output_types(),
            output_shapes=self.model.output_shapes())
        return dataset.repeat().batch(self.batch_size)
Пример #3
0
class BatchSequence(object):
    """
    The idea is to have a shallow sequence with types that can hydrate
    themselves to np.ndarray initially and later into the types required by
    tf.data (i.e. dictionaries or np.ndarrays).
    """
    def __init__(self, model: KerasPilot, config: Config,
                 records: List[TubRecord], is_train: bool) -> None:
        self.model = model
        self.config = config
        self.sequence = TubSequence(records)
        self.batch_size = self.config.BATCH_SIZE
        self.is_train = is_train
        self.augmentation = ImageAugmentation(config)
        self.pipeline = self._create_pipeline()

    def __len__(self) -> int:
        return math.ceil(len(self.pipeline) / self.batch_size)

    def _create_pipeline(self) -> TfmIterator:
        """ This can be overridden if more complicated pipelines are
            required """

        # 1. Initialise TubRecord -> x, y transformations
        def get_x(record: TubRecord) -> Dict[str, Union[float, np.ndarray]]:
            """ Extracting x from record for training"""
            # this transforms the record into x for training the model to x,y
            x0 = self.model.x_transform(record)
            # for multiple input tensors the return value here is a tuple
            # where the image is in first slot otherwise x0 is the image
            x1 = x0[0] if isinstance(x0, tuple) else x0
            x1 = np.squeeze(x1)
            # apply augmentation to training data only
            x2 = self.augmentation.augment(x1) if self.is_train else x1
            # normalise image, assume other input data comes already normalised
            x3 = normalize_image(x2)
            # fill normalised image back into tuple if necessary
            x4 = (x3, ) + x0[1:] if isinstance(x0, tuple) else x3
            # convert tuple to dictionary which is understood by tf.data
            x5 = self.model.x_translate(x4)
            return x5

        def get_y(record: TubRecord) -> Dict[str, Union[float, np.ndarray]]:
            """ Extracting y from record for training """
            y0 = self.model.y_transform(record)
            y1 = self.model.y_translate(y0)
            return y1

        # 2. Build pipeline using the transformations
        pipeline = self.sequence.build_pipeline(x_transform=get_x,
                                                y_transform=get_y)
        return pipeline

    def create_tf_data(self) -> tf.data.Dataset:
        """ Assembles the tf data pipeline """
        dataset = tf.data.Dataset.from_generator(
            generator=lambda: self.pipeline,
            output_types=self.model.output_types(),
            output_shapes=self.model.output_shapes())
        return dataset.repeat().batch(self.batch_size)
Пример #4
0
    def test_iterator_consistency(self):
        extract = TubSequence.build_pipeline(
            self.sequence,
            x_transform=lambda record: record.underlying['user/angle'],
            y_transform=lambda record: record.underlying['user/throttle'])
        # iterate twice through half the data
        r1 = list()
        r2 = list()
        for r in r1, r2:
            iterator = iter(extract)
            for i in range(size // 2):
                r.append(next(iterator))

        self.assertEqual(r1, r2)
        # now transform and iterate through pipeline twice to see iterator
        # doesn't exhaust
        transformed = TubSequence.map_pipeline(
            x_transform=lambda x: 2 * x,
            y_transform=lambda y: 3 * y,
            pipeline=extract)
        l1 = list(transformed)
        l2 = list(transformed)
        self.assertEqual(l1, l2)
        for e, t in zip(extract, transformed):
            ex, ey = e
            tx, ty = t
            self.assertAlmostEqual(2 * ex, tx)
            self.assertAlmostEqual(3 * ey, ty)
Пример #5
0
class TorchTubDataset(IterableDataset):
    '''
    Loads the dataset, and creates a train/test split.
    '''
    def __init__(self, config, records: List[TubRecord], transform=None):
        """Create a PyTorch Tub Dataset

        Args:
            config (object): the configuration information
            records (List[TubRecord]): a list of tub records
            transform (function, optional): a transform to apply to the data
        """
        self.config = config

        # Handle the transforms
        if transform:
            self.transform = transform
        else:
            self.transform = get_default_transform()

        self.sequence = TubSequence(records)
        self.pipeline = self._create_pipeline()

    def _create_pipeline(self):
        """ This can be overridden if more complicated pipelines are
            required """
        def y_transform(record: TubRecord):
            angle: float = record.underlying['user/angle']
            throttle: float = record.underlying['user/throttle']
            predictions = torch.tensor([angle, throttle], dtype=torch.float)

            # Normalize to be between [0, 1]
            # angle and throttle are originally between [-1, 1]
            predictions = (predictions + 1) / 2
            return predictions

        def x_transform(record: TubRecord):
            # Loads the result of Image.open()
            img_arr = record.image(cached=True, as_nparray=False)
            return self.transform(img_arr)

        # Build pipeline using the transformations
        pipeline = self.sequence.build_pipeline(x_transform=x_transform,
                                                y_transform=y_transform)

        return pipeline

    def __iter__(self):
        return iter(self.pipeline)