Пример #1
0
def test_read_mapping(tmpdir):
    rows = [["orig", "new"], ["0", "1"], ["1", "2"], ["2", "3"]]

    filepath = tmpdir.join("mapping.csv")
    save_csv(rows=rows, filepath=str(filepath))
    mapping = read_mapping(str(filepath), header=True)
    assert mapping == {0: 1, 1: 2, 2: 3}
Пример #2
0
def test_read_mapping():
    with tempfile.NamedTemporaryFile() as f:
        f.write('orig,new\n0,1\n20,10\n40,15'.encode())
        f.seek(0)
        assert {
            0: 1,
            20: 10,
            40: 15
        } == io.read_mapping(f.name, skip_header=True)
        # Header is non-integer.
        with pytest.raises(ValueError):
            io.read_mapping(f.name, skip_header=False)

    with tempfile.NamedTemporaryFile() as f:
        f.write('orig,new\n0,1\n20,10\n40'.encode())
        f.seek(0)
        # Last row only has one value.
        with pytest.raises(ValueError):
            io.read_mapping(f.name, skip_header=False)

    with tempfile.NamedTemporaryFile() as f:
        f.write('origFnew\n0F1\n20F10\n40F15'.encode())
        f.seek(0)
        assert {
            0: 1,
            20: 10,
            40: 15
        } == io.read_mapping(f.name, skip_header=True, delimiter='F')
Пример #3
0
def train(params):
    """Train estimator."""
    if params['aparcaseg_mapping']:
        tf.logging.info(
            "Reading mapping file: {}".format(params['aparcaseg_mapping']))
        mapping = read_mapping(params['aparcaseg_mapping'])
    else:
        mapping = None

    def normalizer_aparcaseg(features, labels):
        return (
            normalize_zero_one(features),
            preprocess_aparcaseg(labels, mapping))

    def normalizer_brainmask(features, labels):
        return (
            normalize_zero_one(features),
            binarize(labels, threshold=0))

    if params['aparcaseg_mapping'] is not None:
        normalizer = normalizer_aparcaseg
    elif params['brainmask']:
        normalizer = normalizer_brainmask
    else:
        normalizer = None

    list_of_filepaths = read_csv(params['csv'])

    def generator_builder():
        """Return a function that returns a generator."""
        return iter_volumes(
            list_of_filepaths=list_of_filepaths,
            vol_shape=params['vol_shape'],
            block_shape=params['block_shape'],
            x_dtype=_DT_X_NP,
            y_dtype=_DT_Y_NP,
            strides=params['strides'],
            shuffle=True,
            normalizer=normalizer)

    _output_shapes = (
        (*params['block_shape'], 1),
        params['block_shape'])

    input_fn = input_fn_builder(
        generator=generator_builder,
        output_types=(_DT_X_TF, _DT_Y_TF),
        output_shapes=_output_shapes,
        num_epochs=params['n_epochs'],
        batch_size=params['batch_size'],
        # TODO(kaczmarj): add multi-gpu support for training on volumes.
        # multi_gpu=params['multi_gpu'],
        # examples_per_epoch=examples_per_epoch,
    )

    runconfig = tf.estimator.RunConfig(
        save_summary_steps=25,
        save_checkpoints_steps=500,
        keep_checkpoint_max=100)

    model = nobrainer.models.get_estimator(params['model'])(
        n_classes=params['n_classes'],
        optimizer=params['optimizer'],
        learning_rate=params['learning_rate'],
        model_dir=params['model_dir'],
        config=runconfig,
        multi_gpu=params['multi_gpu'])

    # Setup for training and periodic evaluation.
    if params['eval_csv'] is not None:
        eval_list_of_filepaths = read_csv(params['eval_csv'])
        gen = nobrainer.util.iter_volumes(
            list_of_filepaths=eval_list_of_filepaths,
            x_dtype=_DT_X_NP,
            y_dtype=_DT_Y_NP,
            vol_shape=params['vol_shape'],
            block_shape=params['block_shape'],
            strides=params['strides'],
            shuffle=False,
            normalizer=normalizer)

        def _get_eval_features_labels():
            _features = []
            _labels = []
            for _f, _l in gen:
                _features.append(_f)
                _labels.append(_l)
            return np.stack(_features), np.stack(_labels)

        tf.logging.info("Loading evaluation data")
        _eval_features, _eval_labels = _get_eval_features_labels()

        eval_input_fn = tf.estimator.inputs.numpy_input_fn(
            x=_eval_features, y=_eval_labels, batch_size=2, num_epochs=1,
            shuffle=False)

        _monitors = [
            tf.contrib.learn.monitors.ValidationMonitor(
                input_fn=eval_input_fn, every_n_steps=2000,
                early_stopping_metric=None, early_stopping_rounds=None)]
        hooks = tf.contrib.learn.monitors.replace_monitors_with_hooks(
            _monitors, model)

    # Training without evaluation.
    else:
        hooks = None

    model.train(input_fn=input_fn, hooks=hooks)
Пример #4
0
def train(params):

    model_config = tf.estimator.RunConfig(
        save_summary_steps=params['save_summary_steps'],
        save_checkpoints_steps=params['save_checkpoints_steps'],
        keep_checkpoint_max=params['keep_checkpoint_max'])

    model = get_estimator(params['model'])(
        n_classes=params['n_classes'],
        optimizer=params['optimizer'],
        learning_rate=params['learning_rate'],
        model_dir=params['model_dir'],
        config=model_config,
        multi_gpu=params['multi_gpu'],
        **params['model_opts'])

    label_mapping = None
    if params['label_mapping']:
        tf.logging.info(
            "Reading mapping file: {}".format(params['label_mapping']))
        label_mapping = read_mapping(params['label_mapping'])

    filepaths = read_csv(params['csv'])

    volume_data_generator = VolumeDataGenerator(
        samplewise_minmax=params['samplewise_minmax'],
        samplewise_zscore=params['samplewise_zscore'],
        samplewise_center=params['samplewise_center'],
        samplewise_std_normalization=params['samplewise_std_normalization'],
        flip=params['flip'],
        rescale=params['rescale'],
        rotate=params['rotate'],
        gaussian=params['gaussian'],
        reduce_contrast=params['reduce_contrast'],
        salt_and_pepper=params['salt_and_pepper'],
        brightness_range=params['brightness_range'],
        shift_range=params['shift_range'],
        zoom_range=params['zoom_range'],
        binarize_y=params['binarize'],
        mapping_y=label_mapping)

    if params['eval_csv']:
        eval_filepaths = read_csv(params['eval_csv'])
        eval_volume_data_generator = VolumeDataGenerator(
            binarize_y=params['binarize'],
            mapping_y=label_mapping)
    else:
        eval_filepaths = None
        eval_volume_data_generator = None

    _train(
        model=model,
        volume_data_generator=volume_data_generator,
        filepaths=filepaths,
        volume_shape=params['volume_shape'],
        block_shape=params['block_shape'],
        strides=params['strides'],
        x_dtype='float32',
        y_dtype='int32',
        shuffle=True,
        batch_size=params['batch_size'],
        n_epochs=params['n_epochs'],
        prefetch=params['prefetch'],
        multi_gpu=params['multi_gpu'],
        eval_volume_data_generator=eval_volume_data_generator,
        eval_filepaths=eval_filepaths)
Пример #5
0
def validate_from_filepath(
    filepath,
    predictor,
    block_shape,
    n_classes,
    mapping_y,
    return_variance=False,
    return_entropy=False,
    return_array_from_images=False,
    n_samples=1,
    normalizer=normalize_zero_one,
    batch_size=4,
    dtype=DT_X,
):
    """Computes dice for a prediction compared to a ground truth image.

    Args:
        filepath: tuple, tupel of paths to existing neuroimaging volume (index 0)
         and ground truth (index 1).
        predictor: TensorFlow Predictor object, predictor from previously
            trained model.
        n_classes: int, number of classifications the model is trained to output.
        mapping_y: path-like, path to csv mapping file per command line argument.
        block_shape: tuple of len 3, shape of blocks on which to predict.
        return_variance: Boolean. If set True, it returns the running population 
            variance along with mean. Note, if the n_samples is smaller or equal to 1,
            the variance will not be returned; instead it will return None
        return_entropy: Boolean. If set True, it returns the running entropy.
            along with mean.       
        return_array_from_images: Boolean. If set True and the given input is either image,
            filepath, or filepaths, it will return arrays of [mean, variance, entropy]
            instead of images of them. Also, if the input is array, it will
            simply return array, whether or not this flag is True or False.
        n_samples: The number of sampling. If set as 1, it will just return the 
            single prediction value.
        normalizer: callable, function that accepts an ndarray and returns an
            ndarray. Called before separating volume into blocks.
        batch_size: int, number of sub-volumes per batch for prediction.
        dtype: str or dtype object, dtype of features.

    Returns:
        `nibabel.spatialimages.SpatialImage` or arrays of predictions of 
        mean, variance(optional), and entropy (optional).
    """
    if not Path(filepath[0]).is_file():
        raise FileNotFoundError("could not find file {}".format(filepath[0]))
    img = nib.load(filepath[0])
    y = read_volume(filepath[1], dtype=np.int32)

    outputs = _predict(inputs=img,
                       predictor=predictor,
                       block_shape=block_shape,
                       return_variance=return_variance,
                       return_entropy=return_entropy,
                       return_array_from_images=return_array_from_images,
                       n_samples=n_samples,
                       normalizer=normalizer,
                       batch_size=batch_size)
    prediction_image = outputs[0].get_data()
    y = replace(y, read_mapping(mapping_y))
    dice = get_dice_for_images(prediction_image, y, n_classes)
    return outputs, dice
Пример #6
0
def train(params):
    """Train estimator."""

    x_dataset = params['xdset']
    y_dataset = params['ydset']

    tf.logging.info('Using features dataset {x} and labels dataset {y}'.format(
        x=x_dataset, y=y_dataset))

    with h5py.File(params['hdf5path'], mode='r') as fp:
        examples_per_epoch = fp[x_dataset].shape[0]
        assert examples_per_epoch == fp[y_dataset].shape[0]

    if params['aparcaseg_mapping']:
        tf.logging.info("Reading mapping file: {}".format(
            params['aparcaseg_mapping']))
        mapping = read_mapping(params['aparcaseg_mapping'])
    else:
        mapping = None

    def normalizer_aparcaseg(features, labels):
        return features, preprocess_aparcaseg(labels, mapping)

    def normalizer_brainmask(features, labels):
        return features, binarize(labels, threshold=0)

    if params['aparcaseg_mapping'] is not None:
        normalizer = normalizer_aparcaseg
    elif params['brainmask']:
        normalizer = normalizer_brainmask
    else:
        normalizer = None

    def generator_builder():
        """Return a function that returns a generator."""
        return iter_hdf5(filepath=params['hdf5path'],
                         x_dataset=x_dataset,
                         y_dataset=y_dataset,
                         x_dtype=_DT_X_NP,
                         y_dtype=_DT_Y_NP,
                         shuffle=False,
                         normalizer=normalizer)

    _output_shapes = ((*params['block_shape'], 1), params['block_shape'])

    input_fn = input_fn_builder(generator=generator_builder,
                                output_types=(_DT_X_TF, _DT_Y_TF),
                                output_shapes=_output_shapes,
                                num_epochs=params['n_epochs'],
                                multi_gpu=params['multi_gpu'],
                                examples_per_epoch=examples_per_epoch,
                                batch_size=params['batch_size'])

    runconfig = tf.estimator.RunConfig(save_summary_steps=25,
                                       save_checkpoints_steps=100,
                                       keep_checkpoint_max=100)

    model = nobrainer.models.get_estimator(params['model'])(
        n_classes=params['n_classes'],
        optimizer=params['optimizer'],
        learning_rate=params['learning_rate'],
        model_dir=params['model_dir'],
        config=runconfig,
        multi_gpu=params['multi_gpu'])

    model.train(input_fn=input_fn)