def test_d(size, n_splits): """Test checks for behaviour of D expression in `set_dataset` action. size size of the dataset. n_splits the number if cv folds. """ dataset = Dataset(size) dataset.cv_split(n_splits=n_splits) pipeline = (Pipeline().init_variable('indices', default=[]).update( V('indices', mode='a'), B('indices')[0])) << dataset.CV(C('fold')).train result = list(range(size)) for fold in range(n_splits): pipeline.set_config({'fold': fold}) start = fold * (size // n_splits) end = (fold + 1) * (size // n_splits) for _ in range(2): pipeline.reset('vars') pipeline.run(1) assert pipeline.v('indices') == result[:start] + result[end:]
def batch(): """ Prepare batch and load same DATA to comp1 and comp2 components. """ dataset = Dataset(BATCH_SIZE, MyBatch) batch = (dataset.next_batch(BATCH_SIZE).load(src=DATA, dst='comp1').load( src=DATA, dst='comp2')) return batch
def get_batch(data, pipeline, index=DATASET_SIZE, batch_class=Batch, skip=2, dst=False): """ Parameters ---------- data data to use pipeline : bool or Pipeline whether to get a batch from a dataset or a pipeline index : DatasetIndex batch_class : type skip : int how many batches to skip dst : bool or list of str preload data when False or load to components given """ if dst is False: dataset = Dataset(index, preloaded=data, batch_class=batch_class) else: dataset = Dataset(index, batch_class=batch_class) template_pipeline = ( Pipeline().init_variable('dummy').update( V('dummy'), B.data) # touch batch data to fire preloading ) if isinstance(pipeline, Pipeline): template_pipeline = pipeline + template_pipeline source = ( dataset >> template_pipeline) if pipeline is not False else dataset #skip K batches for _ in range(skip + 1): batch = source.next_batch(10) if dst is not False: batch = batch.load(src=data, dst=dst) return batch
def test_from_dataset_new_batch_class(self, dataset): new_index = DatasetIndex(25) new_ds = Dataset.from_dataset(dataset, new_index, batch_class=ImagesBatch) assert isinstance(new_ds, dataset.__class__) assert new_ds.batch_class == ImagesBatch
def _model_setup(data_format): if data_format == 'channels_last': image_shape = (100, 100, 2) elif data_format == 'channels_first': image_shape = (2, 100, 100) size = 50 batch_shape = (size, ) + image_shape images_array = np.random.random(batch_shape) labels_array = np.random.choice(10, size=size) data = images_array, labels_array dataset = Dataset(index=size, batch_class=ImagesBatch, preloaded=data) model_config = { 'inputs': { 'images': { 'shape': image_shape }, 'labels': { 'classes': 10 } }, 'initial_block/inputs': 'images' } return dataset, model_config
def test_general_get(named_expr): pipeline = (Dataset(10).pipeline({ 'option': 0 }).init_variable('var').do_nothing(named_expr).run(2, lazy=True)) failed = False try: _ = pipeline.next_batch() except KeyError: failed = True if failed: pytest.fail("Name does not exist")
def test_inbatch_parallel_p(named_expr): """ Check if P() is evalauted properly """ pipeline = (Dataset(10, MyBatch).pipeline( dict(mean=0., std=1., option=ARRAY_INIT)).add_namespace(np).init_variable( 'var', ARRAY_INIT).update(B.images, ARRAY_INIT).update( B.masks, ARRAY_INIT).ip_test(param=P(named_expr)).run(BATCH_SIZE, lazy=True)) _ = pipeline.next_batch() assert True
def test_apply_parallel_p(p_type, named_expr, src): """ Check if P() is evalauted properly """ pipeline = (Dataset(10, MyBatch).pipeline( dict(mean=0., std=1., option=ARRAY_INIT)).add_namespace(np).init_variable( 'var', ARRAY_INIT).update(B.images, ARRAY_INIT).update( B.masks, ARRAY_INIT).ap_test( src=src, param=p_type(named_expr)).run(BATCH_SIZE, lazy=True)) b = pipeline.next_batch() if isinstance(src, str): assert True else: assert (b.images == b.masks).all()
def test_rebatch(batch_size, rebatch_size): """ checks that rebatch produces batches of expected lengths (and doesn't crash)""" data = np.vstack([np.array([i, i]) for i in range(DATASET_SIZE)]) data = (data, ) dataset = Dataset(index=DATASET_SIZE, batch_class=MyBatch, preloaded=data) # workaround for pipeline variables getting lost after rebatch batch_lengths = {'before': [], 'after': []} def get_batch_len(batch, dump): batch_lengths[dump].append(batch.size) p = (Pipeline().call(get_batch_len, 'before').rebatch(rebatch_size).call( get_batch_len, 'after')) << dataset p.run(batch_size=batch_size, n_epochs=1, bar=True) check_batch_lengths(batch_lengths['before'], batch_size) check_batch_lengths(batch_lengths['after'], rebatch_size)
def _model_setup(data_format, image_shape=100): dataset_size = 50 num_classes = 10 if data_format == 'channels_last': image_shape = (image_shape, image_shape, 2) else: image_shape = (2, image_shape, image_shape) batch_shape = (dataset_size, *image_shape) images_array = np.random.random(batch_shape) labels_array = np.random.choice(num_classes, size=dataset_size) data = images_array, labels_array dataset = Dataset(index=dataset_size, batch_class=ImagesBatch, preloaded=data) model_config = { 'inputs/images/shape': image_shape, 'inputs/labels/classes': num_classes, 'initial_block/inputs': 'images' } return dataset, model_config
def test_i(name, expectation, limit_name, limit_value, result): """ Check for behaviour of I under different pipeline configurations. name Name of I, defines its output. expectation Test is expected to raise an error when names requires calculaion of total iterations (e.g. for 'm') and this number is not defined in pipeline (limit_value is None). limit_name 'n_epochs' or 'n_iters' limit_value Total numer of epochs or iteration to run. result Expected output of I. If None, I is expected to raise an error. """ kwargs = {'batch_size': 2, limit_name: limit_value, 'lazy': True} pipeline = (Dataset(10).pipeline().init_variable('var', -1).update( V('var', mode='w'), I(name)).run(**kwargs)) with expectation: _ = pipeline.next_batch() assert pipeline.get_variable('var') == result
def dataset(): index = DatasetIndex(100) return Dataset(index, Batch)
def test_build_index(self): new_index = Dataset.build_index(25) assert isinstance(new_index, DatasetIndex)