示例#1
0
def read_crag(crag_path, solution_name='best-effort'):
    """Reads the CRAG (Candidate Region Adjacency) data

    Parameters
    ----------
    crag_path : string
        Path where the crag is stored
    solution_name: string
        Name of the solution to retrieve. If no solution is wanted
        set to None
    Returns
    ----------
    crag: Crag
        Crag main, structure
    volumes: Set of volumes
        Volumes in the crag
    ein: Volume
        Intensity volume
    ebb: Volume
        Boundary volume
    """

    # Check extension is correct
    if not dataio.valid_volume_path(crag_path):
        raise ValueError('Wrong extension for CRAG file {}. Must be valid HDF5 extension'.format(crag_path))

    print('Retrieving CRAG ...')

    # Read crag
    store = Hdf5CragStore(crag_path)
    crag = Crag()
    store.retrieveCrag(crag)

    # Volumes references
    volume_store = Hdf5VolumeStore(crag_path)

    # Gest proposed solution, if requested
    if solution_name is None:
        crag_solution = None
    else:
        # TODO: check if name exists when wrapper bug is solved
        #solution_names = store.getSolutionNames()
        #if solution_name not in solution_names:
        #    raise ValueError('Requested {} is not in set 
        #    {}'.format(solution_name,
        #        solution_names))
        crag_solution = CragSolution(crag)
        store.retrieveSolution(crag, crag_solution, solution_name)

    # Read volumes
    volumes = CragVolumes(crag)
    store.retrieveVolumes(volumes)

    # Read each of/pad_bb the volumes stored in the CRAG
    ebb = ExplicitVolume_f()
    ein = ExplicitVolume_f()
    volume_store.retrieveBoundaries(ebb)
    volume_store.retrieveIntensities(ein)

    return crag, volumes, crag_solution, ein, ebb
示例#2
0
    def read(self, num=None):
        """Reads image content from the dataset path. If a HDF5 provided, the group
        where images are stored must be provided. Default image extensions are PNG,
        TIF and TIFF. Others can be provided
            :param num: Number of images to read from the dataset. Set to None for reading all"""

        if not os.path.exists(self.path):
            raise ValueError('Path {} does not exist'.format(self.path))

        if os.path.isfile(self.path):
            if not dataio.valid_volume_path(self.path):
                raise ValueError('Invalid extension for file {}'.format(
                    self.path))
            with h5py.File(self.path, 'r') as f:
                dataset = dataio.get_hf_group(f, self.group)
                if num is None:
                    self.imgs = dataset[()]
                else:
                    if num > dataset.shape[0]:
                        raise ValueError(
                            'Cannot read more images than the ones available')
                    self.imgs = dataset[0:num][:]
                # Previous code loads all images in memory. Fix this for big
                # datasets when memory is limited
        else:
            reader = dataio.FileReader(self.path, self.exts)
            self.imgs = reader.read(num)

        if self.imgs.shape[0] == 0:
            raise ValueError('No data has been read')
示例#3
0
 def valid_input(self, d):
     """ Transforms the input into a compatible format """
     if os.path.isdir(d):
         return self.process_folder(d)
     elif dataio.valid_volume_path(d):
         return self.process_h5(d)
     else:
         raise IOError('Formats accepted are HDF5 and folders')
示例#4
0
    def read_crag(self, crag):
        """ Reads an existing CRAG from its project file
        Params
        ---------
        crag: crag
            Path to the CRAG project file
        """
        if not dataio.valid_volume_path(crag):
            raise ValueError('A valid HDF5 file must be provided')

        self.project_file = crag
示例#5
0
    def save_data(self,
                  out_p,
                  group='data/labels',
                  min_digit=5,
                  overwrite=True,
                  int_data=False):
        """ Saves processed data. If path to HDF file provided, a dataset is created inside the
        given group. Otherwise, images are dumped into the folder specified in the path.

        Params:
        ---------
        out_p: string
            Output path. If it corresponds to a valid HDF5 it is stored as a HDF5 dataset.
            Otherwise it is stored in a file.
        group: string
            In case the output corresponds to a HDF5 file, it is the path
            inside the dataset where data needs to be stored. Subgroups must be separated by a /.
            Not used uf dumping into a folder.
        min_digit: integer
            Images are named, in order, with its position in the input volume. This number
            specifies the minimum amount of digits to use in the labeling if dumping data into a folder.
            Not used for HDF5 files.
        overwrite: boolean
            Whether to overwrite existing datasets in the destination path
        int_data: boolean
            By default data is stored as float. If this field is True, it is stored as unsigned integer
            in .png files. Only used if data path is a folder.
        """

        if os.path.exists(out_p) and not overwrite:
            return

        self._check_data()

        if dataio.valid_volume_path(out_p):
            with h5py.File(out_p, 'w') as f:
                dataio.store_hf_group(f, group, self.imgs)
        else:
            dataio.create_dir(out_p)
            if int_data is True:
                dataio.volume_to_folder(self.imgs,
                                        out_p,
                                        min_digit=min_digit,
                                        typ='uint8',
                                        ext='.png')
            else:
                dataio.volume_to_folder(self.imgs, out_p, min_digit=min_digit)
示例#6
0
    def _init_output(self):
        """ Prepares the storage of the dataset output """

        if os.path.isfile(self.output_path):
            raise IOError('File {} already exists'.format(self.output_path))
        if not dataio.valid_volume_path(self.output_path):
            raise ValueError('Given output path is not a valid HDF5 file')

        # Create HDF5. Remeber to close it at at the end
        self.h5_file = h5py.File(self.output_path, 'w')

        # Initialize data as empty and resize it when needed. Maximum size: none
        data_shape = (0, self.get_sections(), self.dims, self.conf.height,
                      self.conf.width)
        max_shape = (None, self.get_sections(), self.dims, self.conf.height,
                     self.conf.width)

        # Create group for data, labels, refs and channel maps
        self.h5_file.create_dataset(DATA_TAG,
                                    data_shape,
                                    compression='gzip',
                                    chunks=True,
                                    maxshape=max_shape)
        self.h5_file.create_dataset(REF_TAG, (0, ),
                                    compression='gzip',
                                    chunks=True,
                                    maxshape=(None, ))
        if self.store_label() is True:
            self.h5_file.create_dataset(LABEL_TAG, (0, ),
                                        compression='gzip',
                                        maxshape=(None, ),
                                        chunks=True)
        labels_type = h5py.special_dtype(vlen=str)
        labels_data = np.asarray(list(self.channel_map.keys()), dtype=object)
        self.h5_file.create_dataset('clabels',
                                    data=labels_data,
                                    dtype=labels_type)
        self.h5_file.create_dataset('cpositions',
                                    data=np.asarray(
                                        list(self.channel_map.values())))

        # Store metadata in separate dataset
        self.h5_file.attrs.create('height', data=self.conf.height)
        self.h5_file.attrs.create('width', data=self.conf.width)
        self.h5_file.attrs.create('padding', data=self.conf.padding)
        self.h5_file.attrs.create('normalise', data=self.conf.normalise)