def test_write_read_float_labels(csv_of_volumes, tmp_path): # noqa: F811 files = io.read_csv(csv_of_volumes, skip_header=False) files = [(x, random.random()) for x, _ in files] filename_template = str(tmp_path / "data-{shard:03d}.tfrecords") examples_per_shard = 12 tfrecord.write( files, filename_template=filename_template, examples_per_shard=examples_per_shard, processes=1, ) paths = list(tmp_path.glob("data-*.tfrecords")) paths = sorted(paths) assert len(paths) == 9 assert (tmp_path / "data-008.tfrecords").is_file() dset = tf.data.TFRecordDataset(list(map(str, paths)), compression_type="GZIP") dset = dset.map( tfrecord.parse_example_fn(volume_shape=(8, 8, 8), scalar_label=True)) for ref, test in zip(files, dset): x, y = ref x = io.read_volume(x) assert_array_equal(x, test[0]) assert_array_equal(y, test[1])
def itervolumes(filepaths, block_shape, x_dtype, y_dtype, strides=(1, 1, 1), shuffle=False, normalizer=None): """Yield tuples of numpy arrays `(features, labels)` from a list of filepaths to neuroimaging files. """ filepaths = copy.deepcopy(filepaths) if shuffle: random.shuffle(filepaths) for idx, (features_fp, labels_fp) in enumerate(filepaths): try: features = read_volume(features_fp, dtype=x_dtype) labels = read_volume(labels_fp, dtype=y_dtype) except Exception: tf.logging.fatal( "Error reading at least one input file: {} {}".format( features_fp, labels_fp)) raise if normalizer is not None: features, labels = normalizer(features, labels) _check_shapes_equal(features, labels) feature_gen = iterblocks_3d(arr=features, kernel_size=block_shape, strides=strides) label_gen = iterblocks_3d(arr=labels, kernel_size=block_shape, strides=strides) for ff, ll in zip(feature_gen, label_gen): yield ff[..., np.newaxis], ll
def itervolumes(filepaths, block_shape, x_dtype=np.float32, y_dtype=np.int32, strides=None, shuffle=False, normalizer=None): """Yield tuples of numpy arrays `(features, labels)` from a list of filepaths to neuroimaging files. Args: filepaths: nested list of tuples, where each tuple has length two. The first item in each tuple is the path to the volume of features (e.g., an anatomical scan), and the second item is the path to the volume of features (e.g., FreeSurfer's aparc+aseg.mgz). block_shape: tuple of len 3 or None, the shape of blocks to take from the features and labels. This is useful if a full volume cannot fit into GPU memory. If `block_shape` is `None`, full volumes are yielded. Use `(None, None, None)` if yielding full volumes and volumes have different shapes. x_dtype: dtype object or string, data type of features. y_dtype: dtype object or string, data type of labels. strides: tuple or None, strides to take between blocks. If None, strides will be equal to `block_shape`, which will generate non-overlapping blocks. shuffle: bool, if true, shuffle the list of filepaths. Pairs of `(features, labels)` filepaths are maintained. normalizer: callable, function that accepts two arrays (`features` and `labels`) and returns two arrays (`features` and `labels`). Yields: Tuple of `(features, labels)`. If `block_shape` is a tuple of integers, the shape of `features` is `(*block_shape, 1)`, and the shape of `labels` is `block_shape`. If `block_shape` is `None` or `(None, None, None)`, the shape of `features` is `(*volume_shape, 1)`, and the shape of `labels` is `volume_shape`. """ filepaths = copy.deepcopy(filepaths) # Common error is to pass the CSV filepath as `filepaths` argument. if isinstance(filepaths, str): raise ValueError("`filepaths` must be a nested sequence of filepaths.") if any((len(i) != 2 for i in filepaths)): raise ValueError("Found sequence with len != 2 in `filepaths`.") if shuffle: random.shuffle(filepaths) for idx, (features_fp, labels_fp) in enumerate(filepaths): try: features = read_volume(features_fp, dtype=x_dtype) labels = read_volume(labels_fp, dtype=y_dtype) except Exception: tf.logging.fatal( "Error reading at least one input file: {} {}".format( features_fp, labels_fp)) raise if normalizer is not None: features, labels = normalizer(features, labels) if features.shape != labels.shape: raise ValueError( "Shape of features ({}) is not equal to shape of labels ({}).". format(features.shape, labels.shape)) # Yield full volumes. if block_shape is None or block_shape == (None, None, None): yield (features[..., np.newaxis].astype(x_dtype), labels.astype(y_dtype)) # Yield blocks of volumes. else: feature_gen = iterblocks_3d(arr=features, kernel_size=block_shape, strides=strides) label_gen = iterblocks_3d(arr=labels, kernel_size=block_shape, strides=strides) for ff, ll in zip(feature_gen, label_gen): # TEMP: skip pair if labels are all zero. this prevents us # from running train_and_evaluate because the number of samples # is dynamic. # if not ll.any(): # continue # Add channel axis (channel last). yield ff[..., np.newaxis].astype(x_dtype), ll.astype(y_dtype)
def validate_from_filepath( filepath, predictor, block_shape, n_classes, mapping_y, return_variance=False, return_entropy=False, return_array_from_images=False, n_samples=1, normalizer=normalize_zero_one, batch_size=4, dtype=DT_X, ): """Computes dice for a prediction compared to a ground truth image. Args: filepath: tuple, tupel of paths to existing neuroimaging volume (index 0) and ground truth (index 1). predictor: TensorFlow Predictor object, predictor from previously trained model. n_classes: int, number of classifications the model is trained to output. mapping_y: path-like, path to csv mapping file per command line argument. block_shape: tuple of len 3, shape of blocks on which to predict. return_variance: Boolean. If set True, it returns the running population variance along with mean. Note, if the n_samples is smaller or equal to 1, the variance will not be returned; instead it will return None return_entropy: Boolean. If set True, it returns the running entropy. along with mean. return_array_from_images: Boolean. If set True and the given input is either image, filepath, or filepaths, it will return arrays of [mean, variance, entropy] instead of images of them. Also, if the input is array, it will simply return array, whether or not this flag is True or False. n_samples: The number of sampling. If set as 1, it will just return the single prediction value. normalizer: callable, function that accepts an ndarray and returns an ndarray. Called before separating volume into blocks. batch_size: int, number of sub-volumes per batch for prediction. dtype: str or dtype object, dtype of features. Returns: `nibabel.spatialimages.SpatialImage` or arrays of predictions of mean, variance(optional), and entropy (optional). """ if not Path(filepath[0]).is_file(): raise FileNotFoundError("could not find file {}".format(filepath[0])) img = nib.load(filepath[0]) y = read_volume(filepath[1], dtype=np.int32) outputs = _predict(inputs=img, predictor=predictor, block_shape=block_shape, return_variance=return_variance, return_entropy=return_entropy, return_array_from_images=return_array_from_images, n_samples=n_samples, normalizer=normalizer, batch_size=batch_size) prediction_image = outputs[0].get_data() y = replace(y, read_mapping(mapping_y)) dice = get_dice_for_images(prediction_image, y, n_classes) return outputs, dice