def test_more_map_operations(self): transformed = TubSequence.build_pipeline( self.sequence, x_transform=lambda record: record.underlying['user/angle'], y_transform=lambda record: record.underlying['user/throttle']) transformed_2 = TubSequence.build_pipeline( self.sequence, x_transform=lambda record: record.underlying['user/angle'] * 2, y_transform=lambda record: record.underlying['user/throttle'] * 2) transformed_3 = TubSequence.map_pipeline( x_transform=lambda x: x, y_transform=lambda y: y, pipeline=transformed_2 ) self.assertEqual(len(transformed), size) self.assertEqual(len(transformed_2), size) self.assertEqual(len(transformed_3), size) transformed_list = list(transformed) transformed_list_2 = list(transformed_3) index = np.random.randint(0, 9) x1, y1 = transformed_list[index] x2, y2 = transformed_list_2[index] self.assertAlmostEqual(x1 * 2, x2) self.assertAlmostEqual(y1 * 2, y2)
class BatchSequence(object): """ The idea is to have a shallow sequence with types that can hydrate themselves to np.ndarray initially and later into the types required by tf.data (i.e. dictionaries or np.ndarrays). """ def __init__(self, model: KerasPilot, config: Config, records: List[TubRecord], is_train: bool) -> None: self.model = model self.config = config self.sequence = TubSequence(records) self.batch_size = self.config.BATCH_SIZE self.is_train = is_train self.augmentation = ImageAugmentation(config, 'AUGMENTATIONS') self.transformation = ImageAugmentation(config, 'TRANSFORMATIONS') self.pipeline = self._create_pipeline() def __len__(self) -> int: return math.ceil(len(self.pipeline) / self.batch_size) def image_processor(self, img_arr): """ Transformes the images and augments if in training. Then normalizes it. """ img_arr = self.transformation.run(img_arr) if self.is_train: img_arr = self.augmentation.run(img_arr) norm_img = normalize_image(img_arr) return norm_img def _create_pipeline(self) -> TfmIterator: """ This can be overridden if more complicated pipelines are required """ # 1. Initialise TubRecord -> x, y transformations def get_x(record: TubRecord) -> Dict[str, Union[float, np.ndarray]]: """ Extracting x from record for training""" out_tuple = self.model.x_transform_and_process( record, self.image_processor) # convert tuple to dictionary which is understood by tf.data out_dict = self.model.x_translate(out_tuple) return out_dict def get_y(record: TubRecord) -> Dict[str, Union[float, np.ndarray]]: """ Extracting y from record for training """ y0 = self.model.y_transform(record) y1 = self.model.y_translate(y0) return y1 # 2. Build pipeline using the transformations pipeline = self.sequence.build_pipeline(x_transform=get_x, y_transform=get_y) return pipeline def create_tf_data(self) -> tf.data.Dataset: """ Assembles the tf data pipeline """ dataset = tf.data.Dataset.from_generator( generator=lambda: self.pipeline, output_types=self.model.output_types(), output_shapes=self.model.output_shapes()) return dataset.repeat().batch(self.batch_size)
class BatchSequence(object): """ The idea is to have a shallow sequence with types that can hydrate themselves to np.ndarray initially and later into the types required by tf.data (i.e. dictionaries or np.ndarrays). """ def __init__(self, model: KerasPilot, config: Config, records: List[TubRecord], is_train: bool) -> None: self.model = model self.config = config self.sequence = TubSequence(records) self.batch_size = self.config.BATCH_SIZE self.is_train = is_train self.augmentation = ImageAugmentation(config) self.pipeline = self._create_pipeline() def __len__(self) -> int: return math.ceil(len(self.pipeline) / self.batch_size) def _create_pipeline(self) -> TfmIterator: """ This can be overridden if more complicated pipelines are required """ # 1. Initialise TubRecord -> x, y transformations def get_x(record: TubRecord) -> Dict[str, Union[float, np.ndarray]]: """ Extracting x from record for training""" # this transforms the record into x for training the model to x,y x0 = self.model.x_transform(record) # for multiple input tensors the return value here is a tuple # where the image is in first slot otherwise x0 is the image x1 = x0[0] if isinstance(x0, tuple) else x0 x1 = np.squeeze(x1) # apply augmentation to training data only x2 = self.augmentation.augment(x1) if self.is_train else x1 # normalise image, assume other input data comes already normalised x3 = normalize_image(x2) # fill normalised image back into tuple if necessary x4 = (x3, ) + x0[1:] if isinstance(x0, tuple) else x3 # convert tuple to dictionary which is understood by tf.data x5 = self.model.x_translate(x4) return x5 def get_y(record: TubRecord) -> Dict[str, Union[float, np.ndarray]]: """ Extracting y from record for training """ y0 = self.model.y_transform(record) y1 = self.model.y_translate(y0) return y1 # 2. Build pipeline using the transformations pipeline = self.sequence.build_pipeline(x_transform=get_x, y_transform=get_y) return pipeline def create_tf_data(self) -> tf.data.Dataset: """ Assembles the tf data pipeline """ dataset = tf.data.Dataset.from_generator( generator=lambda: self.pipeline, output_types=self.model.output_types(), output_shapes=self.model.output_shapes()) return dataset.repeat().batch(self.batch_size)
def test_iterator_consistency(self): extract = TubSequence.build_pipeline( self.sequence, x_transform=lambda record: record.underlying['user/angle'], y_transform=lambda record: record.underlying['user/throttle']) # iterate twice through half the data r1 = list() r2 = list() for r in r1, r2: iterator = iter(extract) for i in range(size // 2): r.append(next(iterator)) self.assertEqual(r1, r2) # now transform and iterate through pipeline twice to see iterator # doesn't exhaust transformed = TubSequence.map_pipeline( x_transform=lambda x: 2 * x, y_transform=lambda y: 3 * y, pipeline=extract) l1 = list(transformed) l2 = list(transformed) self.assertEqual(l1, l2) for e, t in zip(extract, transformed): ex, ey = e tx, ty = t self.assertAlmostEqual(2 * ex, tx) self.assertAlmostEqual(3 * ey, ty)
class TorchTubDataset(IterableDataset): ''' Loads the dataset, and creates a train/test split. ''' def __init__(self, config, records: List[TubRecord], transform=None): """Create a PyTorch Tub Dataset Args: config (object): the configuration information records (List[TubRecord]): a list of tub records transform (function, optional): a transform to apply to the data """ self.config = config # Handle the transforms if transform: self.transform = transform else: self.transform = get_default_transform() self.sequence = TubSequence(records) self.pipeline = self._create_pipeline() def _create_pipeline(self): """ This can be overridden if more complicated pipelines are required """ def y_transform(record: TubRecord): angle: float = record.underlying['user/angle'] throttle: float = record.underlying['user/throttle'] predictions = torch.tensor([angle, throttle], dtype=torch.float) # Normalize to be between [0, 1] # angle and throttle are originally between [-1, 1] predictions = (predictions + 1) / 2 return predictions def x_transform(record: TubRecord): # Loads the result of Image.open() img_arr = record.image(cached=True, as_nparray=False) return self.transform(img_arr) # Build pipeline using the transformations pipeline = self.sequence.build_pipeline(x_transform=x_transform, y_transform=y_transform) return pipeline def __iter__(self): return iter(self.pipeline)