Exemplo n.º 1
0
        def _pipelines(model_class):
            config = {}
            data_format = predict_args = predict_kwargs = None
            if issubclass(model_class, TFModel):
                data_format = 'channels_last'
                config.update({'dtype': None})
                predict_args = ()
                predict_kwargs = dict(images=B('images'))
            elif issubclass(model_class, TorchModel):
                data_format = 'channels_first'
                config.update({'dtype': 'float32'})
                predict_args = (B('images'),)
                predict_kwargs = dict()

            dataset, model_config = model_setup_images_clf(data_format)
            config.update({'model_class': model_class, 'model_config': model_config})

            save_pipeline = (Pipeline()
                             .init_variable('predictions', default=[])
                             .init_model('dynamic', C('model_class'), 'model', C('model_config'))
                             .to_array(dtype=C('dtype'))
                             .predict_model('model', *predict_args,
                                            fetches='predictions', save_to=V('predictions', mode='a'),
                                            **predict_kwargs))
            load_pipeline = (Pipeline()
                             .init_variable('predictions', default=[])
                             .to_array(dtype=C('dtype'))
                             .predict_model('model', *predict_args,
                                            fetches='predictions', save_to=V('predictions', mode='a'),
                                            **predict_kwargs))

            save_pipeline = (save_pipeline << dataset) << config
            load_pipeline = (load_pipeline << dataset) << config
            return save_pipeline, load_pipeline
Exemplo n.º 2
0
def test_d(size, n_splits):
    """Test checks for behaviour of D expression in `set_dataset` action.

    size
        size of the dataset.
    n_splits
        the number if cv folds.
    """
    dataset = Dataset(size)
    dataset.cv_split(n_splits=n_splits)

    pipeline = (Pipeline().init_variable('indices', default=[]).update(
        V('indices', mode='a'),
        B('indices')[0])) << dataset.CV(C('fold')).train

    result = list(range(size))

    for fold in range(n_splits):
        pipeline.set_config({'fold': fold})
        start = fold * (size // n_splits)
        end = (fold + 1) * (size // n_splits)

        for _ in range(2):
            pipeline.reset('vars')
            pipeline.run(1)

            assert pipeline.v('indices') == result[:start] + result[end:]
Exemplo n.º 3
0
def pipeline():
    """ Creates a pipeline configured to use a given model with a specified configuration.

    Notes
    -----
    Pipeline can be executed only if its config contains the following parameters:

    model_class : TFModel
        Architecture of model. List of available models is defined at 'AVAILABLE_MODELS'.

    model_config : Config
       Model parameters.

    Returns
    -------
    Pipeline
        A pipeline that contains model initialization and training with a given config.
    """

    test_pipeline = (Pipeline().init_variable('current_loss').init_model(
        'dynamic', C('model_class'), 'model',
        C('model_config')).to_array().train_model('model',
                                                  fetches='loss',
                                                  images=B('images'),
                                                  labels=B('labels'),
                                                  save_to=V('current_loss')))
    return test_pipeline
Exemplo n.º 4
0
def model_pipeline():
    """ Creates instance of Pipeline that is configured to use given model
    with passed parameters.

    Parameters
    ----------

    model_class : subclass of TFModel
        Architecture of model. List of available models is defined at 'AVAILABLE_MODELS'.

    current_config : dict
        Dictionary with parameters of model.

    Returns
    -------
    Pipeline
        Test pipeline that consists of initialization of model and
        preparing for training with given config.
    """

    test_pipeline = (Pipeline().init_variable('current_loss').init_model(
        'dynamic', C('model_class'), 'TestModel',
        C('model_config')).to_array().train_model('TestModel',
                                                  fetches='loss',
                                                  images=B('images'),
                                                  labels=B('labels'),
                                                  save_to=V('current_loss')))
    return test_pipeline
Exemplo n.º 5
0
def get_batch(data,
              pipeline,
              index=DATASET_SIZE,
              batch_class=Batch,
              skip=2,
              dst=False):
    """
    Parameters
    ----------
    data
        data to use
    pipeline : bool or Pipeline
        whether to get a batch from a dataset or a pipeline

    index : DatasetIndex

    batch_class : type

    skip : int
        how many batches to skip

    dst : bool or list of str
        preload data when False or load to components given
    """

    if dst is False:
        dataset = Dataset(index, preloaded=data, batch_class=batch_class)
    else:
        dataset = Dataset(index, batch_class=batch_class)

    template_pipeline = (
        Pipeline().init_variable('dummy').update(
            V('dummy'), B.data)  # touch batch data to fire preloading
    )

    if isinstance(pipeline, Pipeline):
        template_pipeline = pipeline + template_pipeline

    source = (
        dataset >> template_pipeline) if pipeline is not False else dataset

    #skip K batches
    for _ in range(skip + 1):
        batch = source.next_batch(10)

    if dst is not False:
        batch = batch.load(src=data, dst=dst)

    return batch
Exemplo n.º 6
0
def main():
    device_id = 4
    print("Used: ", get_mem_info(device_id))
    dataset = Imagenette160(bar=True)

    model_config = dict(model=UNet)
    model_config['device'] = f'cuda:{device_id}'
    model_config['loss'] = 'mse'

    train_pipeline = (dataset.train.p.crop(
        shape=(160, 160),
        origin='center').init_variable('loss_history', []).to_array(
            channels='first', dtype=np.float32).multiply(1. / 255).init_model(
                'dynamic', UNet, 'unet',
                config=model_config).train_model('unet',
                                                 B.images,
                                                 B.images,
                                                 fetches='loss',
                                                 save_to=V('loss_history',
                                                           mode='a'),
                                                 use_lock=True))

    #     init_batch_size = 2
    #     n_iters = 50
    #     batch_size = init_batch_size
    #     torch.cuda.empty_cache()
    #     first_run_memory = get_run_mem(dataset, device_id, model_config, train_pipeline, batch_size=batch_size, n_iters=n_iters)
    #     torch.cuda.empty_cache()
    #     for i in range(1, 6):
    #         init_batch_size = pow(2,(i-1))*batch_size
    #         second_run_memory = get_run_mem(dataset, device_id, model_config, train_pipeline, batch_size=pow(2,i)*batch_size, n_iters=n_iters)
    #         print("Batches: ",  pow(2,(i-1))*batch_size,  pow(2,i)*batch_size)
    #         print(first_run_memory, second_run_memory)
    #         print("Max batch size:", init_batch_size * (100 - 2 * first_run_memory + second_run_memory)/(second_run_memory - first_run_memory))
    #         first_run_memory = second_run_memory

    n_iters = 50
    batch_size = 78
    second_run_memory = get_run_mem(dataset,
                                    device_id,
                                    model_config,
                                    train_pipeline,
                                    batch_size=batch_size,
                                    n_iters=n_iters)
    print(second_run_memory)
Exemplo n.º 7
0
def test_i(name, expectation, limit_name, limit_value, result):
    """ Check for behaviour of I under different pipeline configurations.

    name
        Name of I, defines its output.
    expectation
        Test is expected to raise an error when names requires calculaion of total iterations (e.g. for 'm')
        and this number is not defined in pipeline (limit_value is None).
    limit_name
        'n_epochs' or 'n_iters'
    limit_value
        Total numer of epochs or iteration to run.
    result
        Expected output of I. If None, I is expected to raise an error.
    """
    kwargs = {'batch_size': 2, limit_name: limit_value, 'lazy': True}

    pipeline = (Dataset(10).pipeline().init_variable('var', -1).update(
        V('var', mode='w'), I(name)).run(**kwargs))

    with expectation:
        _ = pipeline.next_batch()

    assert pipeline.get_variable('var') == result
Exemplo n.º 8
0
import numpy as np

sys.path.append('..')
from batchflow import B, C, D, F, L, V, R, P, I, Dataset, Pipeline, Batch, apply_parallel, inbatch_parallel, action

#--------------------
#      COMMON
#--------------------


@pytest.mark.parametrize('named_expr', [
    C('option'),
    C('not defined', default=10),
    B('size'),
    D('size'),
    V('var'),
    R('normal', 0, 1),
    R('normal', 0, 1, size=B.size),
    F(lambda batch: 0),
    L(lambda: 0),
])
def test_general_get(named_expr):
    pipeline = (Dataset(10).pipeline({
        'option': 0
    }).init_variable('var').do_nothing(named_expr).run(2, lazy=True))

    failed = False
    try:
        _ = pipeline.next_batch()
    except KeyError:
        failed = True
Exemplo n.º 9
0
    'body/block/layout': 'cna',
    'device': 'gpu:2'
}

mnist = MNIST()

train_ppl = (mnist.train.p.init_variable(
    'loss', init_on_each_run=list).init_variable(
        'accuracy', init_on_each_run=list).init_model(
            'dynamic', VGG16, 'conv', config=model_config).to_array(
                channels='first', dtype='float32').train_model(
                    'conv',
                    B('images'),
                    B('labels'),
                    fetches='loss',
                    save_to=V('loss', mode='w')).run(BATCH_SIZE,
                                                     shuffle=True,
                                                     n_epochs=1,
                                                     lazy=True))

test_ppl = (mnist.test.p.init_variable('predictions').init_variable(
    'metrics', init_on_each_run=None).import_model('conv', train_ppl).to_array(
        channels='first', dtype='float32').predict_model(
            'conv',
            B('images'),
            targets=B('labels'),
            fetches='predictions',
            save_to=V('predictions')).gather_metrics(
                'class',
                targets=B('labels'),
                predictions=V('predictions'),
Exemplo n.º 10
0
# pylint: disable=redefined-outer-name, missing-docstring, bad-continuation
import sys
from contextlib import ExitStack as does_not_raise

import pytest

sys.path.append('..')
from batchflow import B, C, D, F, L, V, R, P, I, Dataset


@pytest.mark.parametrize('named_expr', [
    C('option'),
    B('size'),
    D('size'),
    V('var'),
    R('normal', 0, 1),
    P('normal', 0, 1),
    F(lambda batch: 0),
    L(lambda: 0),
])
def test_general_get(named_expr):
    pipeline = (Dataset(10).pipeline({
        'option': 0
    }).init_variable('var').do_nothing(named_expr).run(2, lazy=True))

    failed = False
    try:
        _ = pipeline.next_batch()
    except KeyError:
        failed = True
    if failed:
Exemplo n.º 11
0
mnist = MNIST()
train_root = mnist.train.p.run(BATCH_SIZE,
                               shuffle=True,
                               n_epochs=None,
                               lazy=True)
test_root = mnist.test.p.run(BATCH_SIZE, shuffle=True, n_epochs=1, lazy=True)

train_template = (Pipeline().init_variable(
    'loss', init_on_each_run=list).init_variable(
        'accuracy', init_on_each_run=list).init_model(
            'dynamic', VGG16, 'conv',
            config=model_config).to_array().train_model('conv',
                                                        images=B('images'),
                                                        labels=B('labels'),
                                                        fetches='loss',
                                                        save_to=V('loss',
                                                                  mode='w')))

test_template = (Pipeline().init_variable('predictions').init_variable(
    'metrics', init_on_each_run=None).import_model(
        'conv', C('import_from')).to_array().predict_model(
            'conv',
            images=B('images'),
            fetches='predictions',
            save_to=V('predictions')).gather_metrics(
                'class',
                targets=B('labels'),
                predictions=V('predictions'),
                fmt='logits',
                axis=-1,
                save_to=V('metrics', mode='a')))