Exemplo n.º 1
0
def ImageShardTransferTask(
  src_path: str,
  dst_path: str,
  shape: ShapeType,
  offset: ShapeType,
  mip: int = 0,
  fill_missing: bool = False,
  translate: ShapeType = (0, 0, 0),
  agglomerate: bool = False,
  timestamp: Optional[int] = None,
):
  """
  Generates a sharded image volume from
  a preexisting CloudVolume readable data 
  source. Downsamples are not generated.

  The sharded specification can be read here:
  Shard Container: 
  https://github.com/google/neuroglancer/blob/056a3548abffc3c76c93c7a906f1603ce02b5fa3/src/neuroglancer/datasource/precomputed/sharded.md
  Sharded Images:    
  https://github.com/google/neuroglancer/blob/056a3548abffc3c76c93c7a906f1603ce02b5fa3/src/neuroglancer/datasource/precomputed/volume.md#unsharded-chunk-storage
  """
  shape = Vec(*shape)
  offset = Vec(*offset)
  mip = int(mip)
  fill_missing = bool(fill_missing)
  translate = Vec(*translate)

  src_vol = CloudVolume(
    src_path, fill_missing=fill_missing, 
    mip=mip, bounded=False
  )
  dst_vol = CloudVolume(
    dst_path,
    fill_missing=fill_missing,
    mip=mip,
    compress=None
  )

  dst_bbox = Bbox(offset, offset + shape)
  dst_bbox = Bbox.clamp(dst_bbox, dst_vol.meta.bounds(mip))
  dst_bbox = dst_bbox.expand_to_chunk_size(
    dst_vol.meta.chunk_size(mip), 
    offset=dst_vol.meta.voxel_offset(mip)
  )
  src_bbox = dst_bbox - translate

  img = src_vol.download(
    src_bbox, agglomerate=agglomerate, timestamp=timestamp
  )
  (filename, shard) = dst_vol.image.make_shard(
    img, dst_bbox, mip, progress=False
  )
  del img

  basepath = dst_vol.meta.join(
    dst_vol.cloudpath, dst_vol.meta.key(mip)
  )

  CloudFiles(basepath).put(filename, shard)
Exemplo n.º 2
0
def MeshSpatialIndex(
  cloudpath:str, 
  shape:Tuple[int,int,int], 
  offset:Tuple[int,int,int], 
  mip:int = 0, 
  fill_missing:bool=False, 
  compress:Optional[Union[str,bool]] = 'gzip', 
  mesh_dir:Optional[str] = None
) -> None:
  """
  The main way to add a spatial index is to use the MeshTask,
  but old datasets or broken datasets may need it to be 
  reconstituted. An alternative use is create the spatial index
  over a different area size than the mesh task.
  """
  cv = CloudVolume(
    cloudpath, mip=mip, 
    bounded=False, fill_missing=fill_missing
  )
  cf = CloudFiles(cloudpath)

  bounds = Bbox(Vec(*offset), Vec(*shape) + Vec(*offset))
  bounds = Bbox.clamp(bounds, cv.bounds)

  data_bounds = bounds.clone()
  data_bounds.maxpt += 1 # match typical Marching Cubes overlap

  precision = cv.mesh.spatial_index.precision
  resolution = cv.resolution 

  if not mesh_dir:
    mesh_dir = cv.info["mesh"]

  # remap: old img -> img
  img, remap = cv.download(data_bounds, renumber=True)
  img = img[...,0]
  slcs = find_objects(img)
  del img
  reverse_map = { v:k for k,v in remap.items() } # img -> old img

  bboxes = {}
  for label, slc in enumerate(slcs):
    if slc is None:
      continue
    mesh_bounds = Bbox.from_slices(slc)
    mesh_bounds += Vec(*offset)
    mesh_bounds *= Vec(*resolution, dtype=np.float32)
    bboxes[str(reverse_map[label+1])] = \
      mesh_bounds.astype(resolution.dtype).to_list()

  bounds = bounds.astype(resolution.dtype) * resolution
  cf.put_json(
    f"{mesh_dir}/{bounds.to_filename(precision)}.spatial",
    bboxes,
    compress=compress,
    cache_control=False,
  )
Exemplo n.º 3
0
def TransferTask(
        src_path,
        dest_path,
        mip,
        shape,
        offset,
        translate=(0, 0, 0),  # change of origin
        fill_missing=False,
        skip_first=False,
        skip_downsamples=False,
        delete_black_uploads=False,
        background_color=0,
        sparse=False,
        axis='z',
        agglomerate=False,
        timestamp=None,
        compress='gzip',
        factor=None):
    shape = Vec(*shape)
    offset = Vec(*offset)
    fill_missing = bool(fill_missing)
    translate = Vec(*translate)
    delete_black_uploads = bool(delete_black_uploads)
    sparse = bool(sparse)
    skip_first = bool(skip_first)
    skip_downsamples = bool(skip_downsamples)

    srccv = CloudVolume(src_path,
                        fill_missing=fill_missing,
                        mip=mip,
                        bounded=False)
    destcv = CloudVolume(dest_path,
                         fill_missing=fill_missing,
                         mip=mip,
                         delete_black_uploads=delete_black_uploads,
                         background_color=background_color,
                         compress=compress)

    dst_bounds = Bbox(offset, shape + offset)
    dst_bounds = Bbox.clamp(dst_bounds, destcv.bounds)
    src_bounds = dst_bounds - translate
    image = srccv.download(src_bounds,
                           agglomerate=agglomerate,
                           timestamp=timestamp)

    if skip_downsamples:
        destcv[dst_bounds] = image
    else:
        downsample_and_upload(image,
                              dst_bounds,
                              destcv,
                              shape,
                              mip=mip,
                              skip_first=skip_first,
                              sparse=sparse,
                              axis=axis,
                              factor=factor)
Exemplo n.º 4
0
class MeshTask(RegisteredTask):
    def __init__(self, shape, offset, layer_path, **kwargs):
        """
    Convert all labels in the specified bounding box into meshes
    via marching cubes and quadratic edge collapse (github.com/seung-lab/zmesh).

    Required:
      shape: (sx,sy,sz) size of task
      offset: (x,y,z) offset from (0,0,0)
      layer_path: neuroglancer/cloudvolume dataset path

    Optional:
      lod: (uint) level of detail to record these meshes at
      mip: (uint) level of the resolution pyramid to download segmentation from
      simplification_factor: (uint) try to reduce the number of triangles in the 
        mesh by this factor (but constrained by max_simplification_error)
      max_simplification_error: The maximum physical distance that
        simplification is allowed to move a triangle vertex by. 
      mesh_dir: which subdirectory to write the meshes to (overrides info file location)
      remap_table: agglomerate segmentation before meshing using { orig_id: new_id }
      generate_manifests: (bool) if it is known that the meshes generated by this 
        task will not be cropped by the bounding box, avoid needing to run a seperate
        MeshManifestTask pass by generating manifests on the spot.

      These two options are used to allow sufficient overlap for trivial mesh stitching
      between adjacent tasks.

        low_padding: (uint) expand the bounding box by this many pixels by subtracting
          this padding from the minimum point of the bounding box on all axes.
        high_padding: (uint) expand the bounding box by this many pixels adding
          this padding to the maximum point of the bounding box on all axes.

      parallel_download: (uint: 1) number of processes to use during the segmentation download
      cache_control: (str: None) specify the cache-control header when uploading mesh files
      dust_threshold: (uint: None) don't bother meshing labels strictly smaller than this number of voxels.
      encoding: (str) 'precomputed' (default) or 'draco'
      draco_compression_level: (uint: 1) only applies to draco encoding
      draco_create_metadata: (bool: False) only applies to draco encoding
      progress: (bool: False) show progress bars for meshing 
      object_ids: (list of ints) if specified, only mesh these ids
      fill_missing: (bool: False) replace missing segmentation files with zeros instead of erroring
      spatial_index: (bool: False) generate a JSON spatial index of which meshes are available in
        a given bounding box. 
      sharded: (bool: False) If True, upload all meshes together as a single mapbuffer 
        fragment file. 
      timestamp: (int: None) (graphene only) use the segmentation existing at this
        UNIX timestamp.
    """
        super(MeshTask, self).__init__(shape, offset, layer_path, **kwargs)
        self.shape = Vec(*shape)
        self.offset = Vec(*offset)
        self.layer_path = layer_path
        self.options = {
            'cache_control': kwargs.get('cache_control', None),
            'draco_compression_level': kwargs.get('draco_compression_level',
                                                  1),
            'draco_create_metadata': kwargs.get('draco_create_metadata',
                                                False),
            'dust_threshold': kwargs.get('dust_threshold', None),
            'encoding': kwargs.get('encoding', 'precomputed'),
            'fill_missing': kwargs.get('fill_missing', False),
            'generate_manifests': kwargs.get('generate_manifests', False),
            'high_padding': kwargs.get('high_padding', 1),
            'low_padding': kwargs.get('low_padding', 0),
            'lod': kwargs.get('lod', 0),
            'max_simplification_error': kwargs.get('max_simplification_error',
                                                   40),
            'simplification_factor': kwargs.get('simplification_factor', 100),
            'mesh_dir': kwargs.get('mesh_dir', None),
            'mip': kwargs.get('mip', 0),
            'object_ids': kwargs.get('object_ids', None),
            'parallel_download': kwargs.get('parallel_download', 1),
            'progress': kwargs.get('progress', False),
            'remap_table': kwargs.get('remap_table', None),
            'spatial_index': kwargs.get('spatial_index', False),
            'sharded': kwargs.get('sharded', False),
            'timestamp': kwargs.get('timestamp', None),
            'agglomerate': kwargs.get('agglomerate', True),
            'stop_layer': kwargs.get('stop_layer', 2),
            'compress': kwargs.get('compress', 'gzip'),
            'closed_dataset_edges': kwargs.get('closed_dataset_edges', True),
        }
        supported_encodings = ['precomputed', 'draco']
        if not self.options['encoding'] in supported_encodings:
            raise ValueError(
                'Encoding {} is not supported. Options: {}'.format(
                    self.options['encoding'], ', '.join(supported_encodings)))
        self._encoding_to_compression_dict = {
            'precomputed': self.options['compress'],
            'draco': False,
        }

    def execute(self):
        self._volume = CloudVolume(self.layer_path,
                                   self.options['mip'],
                                   bounded=False,
                                   parallel=self.options['parallel_download'],
                                   fill_missing=self.options['fill_missing'])
        self._bounds = Bbox(self.offset, self.shape + self.offset)
        self._bounds = Bbox.clamp(self._bounds, self._volume.bounds)

        self.progress = bool(self.options['progress'])

        self._mesher = zmesh.Mesher(self._volume.resolution)

        # Marching cubes loves its 1vx overlaps.
        # This avoids lines appearing between
        # adjacent chunks.
        data_bounds = self._bounds.clone()
        data_bounds.minpt -= self.options['low_padding']
        data_bounds.maxpt += self.options['high_padding']

        self._mesh_dir = self.get_mesh_dir()

        if self.options['encoding'] == 'draco':
            self.draco_encoding_settings = draco_encoding_settings(
                shape=(self.shape + self.options['low_padding'] +
                       self.options['high_padding']),
                offset=self.offset,
                resolution=self._volume.resolution,
                compression_level=self.options["draco_compression_level"],
                create_metadata=self.options['draco_create_metadata'],
                uses_new_draco_bin_size=False,
            )

        # chunk_position includes the overlap specified by low_padding/high_padding
        # agglomerate, timestamp, stop_layer only applies to graphene volumes,
        # no-op for precomputed
        data = self._volume.download(data_bounds,
                                     agglomerate=self.options['agglomerate'],
                                     timestamp=self.options['timestamp'],
                                     stop_layer=self.options['stop_layer'])

        if not np.any(data):
            if self.options['spatial_index']:
                self._upload_spatial_index(self._bounds, {})
            return

        left_offset = Vec(0, 0, 0)
        if self.options["closed_dataset_edges"]:
            data, left_offset = self._handle_dataset_boundary(
                data, data_bounds)

        data = self._remove_dust(data, self.options['dust_threshold'])
        data = self._remap(data)

        if self.options['object_ids']:
            data = fastremap.mask_except(data,
                                         self.options['object_ids'],
                                         in_place=True)

        data, renumbermap = fastremap.renumber(data, in_place=True)
        renumbermap = {v: k for k, v in renumbermap.items()}

        self._mesher.mesh(data[..., 0].T)
        del data

        self.compute_meshes(renumbermap, left_offset)

    def _handle_dataset_boundary(self, data, bbox):
        """
    This logic is used to add a black border along sides
    of the image that touch the dataset boundary which
    results in the closure of the mesh faces on that side.
    """
        if ((not np.any(bbox.minpt == self._volume.bounds.minpt))
                and (not np.any(bbox.maxpt == self._volume.bounds.maxpt))):
            return data, Vec(0, 0, 0)

        shape = Vec(*data.shape, dtype=np.int64)
        offset = Vec(0, 0, 0, 0)
        for i in range(3):
            if bbox.minpt[i] == self._volume.voxel_offset[i]:
                offset[i] += 1
                shape[i] += 1
            if bbox.maxpt[i] == self._volume.bounds.maxpt[i]:
                shape[i] += 1

        slices = (
            slice(offset.x, offset.x + data.shape[0]),
            slice(offset.y, offset.y + data.shape[1]),
            slice(offset.z, offset.z + data.shape[2]),
        )

        mirror_data = np.zeros(shape, dtype=data.dtype, order="F")
        mirror_data[slices] = data
        if offset[0]:
            mirror_data[0, :, :] = 0
        if offset[1]:
            mirror_data[:, 0, :] = 0
        if offset[2]:
            mirror_data[:, :, 0] = 0

        return mirror_data, offset[:3]

    def get_mesh_dir(self):
        if self.options['mesh_dir'] is not None:
            return self.options['mesh_dir']
        elif 'mesh' in self._volume.info:
            return self._volume.info['mesh']
        else:
            raise ValueError(
                "The mesh destination is not present in the info file.")

    def _remove_dust(self, data, dust_threshold):
        if dust_threshold:
            segids, pxct = fastremap.unique(data, return_counts=True)
            dust_segids = [
                sid for sid, ct in zip(segids, pxct)
                if ct < int(dust_threshold)
            ]
            data = fastremap.mask(data, dust_segids, in_place=True)

        return data

    def _remap(self, data):
        if self.options['remap_table'] is None:
            return data

        self.options['remap_table'] = {
            int(k): int(v)
            for k, v in self.options['remap_table'].items()
        }

        remap = self.options['remap_table']
        remap[0] = 0

        data = fastremap.mask_except(data, list(remap.keys()), in_place=True)
        return fastremap.remap(data, remap, in_place=True)

    def compute_meshes(self, renumbermap, offset):
        bounding_boxes = {}
        meshes = {}

        for obj_id in tqdm(self._mesher.ids(),
                           disable=(not self.progress),
                           desc="Mesh"):
            remapped_id = renumbermap[obj_id]
            mesh_binary, mesh_bounds = self._create_mesh(obj_id, offset)
            bounding_boxes[remapped_id] = mesh_bounds.to_list()
            meshes[remapped_id] = mesh_binary

        if self.options['sharded']:
            self._upload_batch(meshes, self._bounds)
        else:
            self._upload_individuals(meshes,
                                     self.options['generate_manifests'])

        if self.options['spatial_index']:
            self._upload_spatial_index(self._bounds, bounding_boxes)

    def _upload_batch(self, meshes, bbox):
        cf = CloudFiles(self.layer_path, progress=self.options['progress'])

        mbuf = MapBuffer(meshes, compress="br")

        cf.put(
            f"{self._mesh_dir}/{bbox.to_filename()}.frags",
            content=mbuf.tobytes(),
            compress=None,
            content_type="application/x.mapbuffer",
            cache_control=False,
        )

    def _upload_individuals(self, mesh_binaries, generate_manifests):
        cf = CloudFiles(self.layer_path)

        content_type = "model/mesh"
        if self.options["encoding"] == "draco":
            content_type = "model/x.draco"

        cf.puts(
            ((f"{self._mesh_dir}/{segid}:{self.options['lod']}:{self._bounds.to_filename()}",
              mesh_binary) for segid, mesh_binary in mesh_binaries.items()),
            compress=self._encoding_to_compression_dict[
                self.options['encoding']],
            cache_control=self.options['cache_control'],
            content_type=content_type,
        )

        if generate_manifests:
            cf.put_jsons(
                ((f"{self._mesh_dir}/{segid}:{self.options['lod']}", {
                    "fragments": [
                        f"{segid}:{self.options['lod']}:{self._bounds.to_filename()}"
                    ]
                }) for segid, mesh_binary in mesh_binaries.items()),
                compress=None,
                cache_control=self.options['cache_control'],
            )

    def _create_mesh(self, obj_id, left_bound_offset):
        mesh = self._mesher.get_mesh(
            obj_id,
            simplification_factor=self.options['simplification_factor'],
            max_simplification_error=self.options['max_simplification_error'],
            voxel_centered=True,
        )

        self._mesher.erase(obj_id)

        resolution = self._volume.resolution
        offset = (self._bounds.minpt - self.options['low_padding']).astype(
            np.float32)
        mesh.vertices[:] += (offset - left_bound_offset) * resolution

        mesh_bounds = Bbox(np.amin(mesh.vertices, axis=0),
                           np.amax(mesh.vertices, axis=0))

        if self.options['encoding'] == 'draco':
            mesh_binary = DracoPy.encode(mesh.vertices, mesh.faces,
                                         **self.draco_encoding_settings)
        elif self.options['encoding'] == 'precomputed':
            mesh_binary = mesh.to_precomputed()

        return mesh_binary, mesh_bounds

    def _upload_spatial_index(self, bbox, mesh_bboxes):
        cf = CloudFiles(self.layer_path, progress=self.options['progress'])
        precision = self._volume.mesh.spatial_index.precision
        resolution = self._volume.resolution

        bbox = bbox.astype(resolution.dtype) * resolution

        cf.put_json(
            f"{self._mesh_dir}/{bbox.to_filename(precision)}.spatial",
            mesh_bboxes,
            compress=self.options['compress'],
            cache_control=False,
        )
Exemplo n.º 5
0
class NeuroglancerSession:
    """Utility class which pulls and pushes data.

    Arguments:
        url: Precompued path either to a file URI or url URI. Defaults to mouselight brain1.
        mip: Resolution level to pull and push data at. Defaults to 0, the highest resolution.
        url_segments: Precomputed path to segmentation data. Optional, default None.

    Attributes:
        url: CloudVolumePrecomputedPath to image data.
        url_segments: CloudVolumePrecomputedPath to segmentation data. Optional, default None. Automatically tries precomputed path url+"_segments" if None.
        cv (CloudVolumePrecomputed): CloudVolume object for image data.
        cv_segments (CloudVolumePrecomputed): CloudVolume object for segmentation data. Optional, default None.
        cv_annotations (CloudVolumePrecomputed): CloudVolume object for segmentation data. Optional, default None.
        mip: Resolution level.
        chunk_size: The chunk size of the volume at the specified mip, given as (x, y, z).
        scales: The resolution of the volume at the specified mip, given as (x, y, z).
    """
    def __init__(
        self,
        url: str,  #  = "s3://open-neurodata/brainlit/brain1"
        mip: int = 0,
        url_segments: Optional[str] = None,
    ):
        check_precomputed(url)
        check_type(mip, (int, np.integer))
        self.url = url
        self.cv = CloudVolume(url, parallel=False)
        if mip < 0 or mip >= len(self.cv.scales):
            raise ValueError(
                f"{mip} should be between 0 and {len(self.cv.scales)}.")
        self.mip = mip
        self.chunk_size = self.cv.scales[self.mip]["chunk_sizes"][0]
        self.scales = self.cv.scales[self.mip]["resolution"]

        self.url_segments = url_segments
        if url_segments is None:
            try:  # default is to add _segments
                self.cv_segments = CloudVolume(url + "_segments",
                                               parallel=False)
                self.url_segments = url + "_segments"
            except InfoUnavailableError:
                warnings.warn(
                    UserWarning(
                        f"Segmentation volume not found at {self.url_segments}, defaulting to None."
                    ))
                self.cv_segments = None
        else:
            check_precomputed(url_segments)
            self.cv_segments = CloudVolume(url_segments, parallel=False)

    def _get_voxel(self, seg_id: int, v_id: int) -> Tuple[int, int, int]:
        """Gets coordinates of segment vertex, in voxel space.

        Arguments:
            seg_id: The id of the segment to use.
            v_id: The id of the vertex to use from the given segment.

        Returns:
            voxel: The voxel coordinates in (x, y, z) voxel space.
        """
        check_type(seg_id, (int, np.integer))
        check_type(v_id, (int, np.integer))
        if self.cv_segments is None:
            raise ValueError("Cannot get voxel without segmentation data")
        seg = self.cv_segments.skeleton.get(seg_id).vertices
        if v_id < 0 or v_id >= len(seg):
            raise ValueError(f"{v_id} should be between 0 and {len(seg)}.")

        vertex = seg[v_id]
        voxel = np.round(
            np.divide(
                vertex,
                self.cv_segments.scales[self.mip]["resolution"])).astype(int)
        return voxel

    def set_url_segments(self, seg_url: str):
        """Sets the url_segments and cv_segments attributes.

        Arguments:
            seg_url: CloudvolumePrecomputedPath to segmentation data.
        """
        check_precomputed(seg_url)

        self.url_segments = seg_url
        self.cv_segments = CloudVolume(self.url_segments, parallel=False)

    def get_segments(self,
                     seg_id: int,
                     bbox: Optional[Bounds] = None) -> nx.Graph:
        """Get a graph of a segmentation annotation within a bounding box.

        Arguments:
            seg_id  The segement to pull.
            bbox: The bounding box object, default None. If None, uses entire volume.

        Returns:
            G: A networkx subgraph from the specified segment and bounding box.
        """
        check_type(seg_id, (int, np.integer))
        if self.cv_segments is None:
            raise ValueError("Cannot get segments without segmentation data.")

        df = read_s3(self.url_segments, seg_id, self.mip)
        G = df_to_graph(df)
        if bbox is not None:
            if isinstance(bbox, Bbox):
                bbox = bbox.to_list()
            check_iterable_type(bbox, (int, np.integer))
            check_iterable_nonnegative(bbox)
            G = get_sub_neuron(G, [bbox[:3], bbox[3:]])
        return G

    def create_tubes(self,
                     seg_id: Union[int, float],
                     bbox: Bounds,
                     radius: Optional[int] = None):
        """Creates voxel-wise foreground/background labels associated with a particular neuron trace,
        within a given bounding box of voxel coordinates.

        Arguments:
            seg_id: The id of the .swc file.
            bbox: The bounding box to draw tubes within.
            radius: Euclidean distance threshold used to draw tubes, default None = 1 px thick.

        Returns:
            labels: A volume within the bounding box, with 1 on tubes and 0 elsewhere.
        """
        if self.cv_segments is None:
            raise ValueError("Cannot get segments without segmentation data.")
        check_type(seg_id, int)
        if radius is not None:
            check_type(radius, (int, np.integer, float, np.float))
            if radius <= 0:
                raise ValueError("Radius must be positive.")

        G = self.get_segments(seg_id, bbox)
        paths = graph_to_paths(G)
        if isinstance(bbox, Bbox):
            bbox = bbox.to_list()
        check_iterable_type(bbox, (int, np.integer))
        check_iterable_nonnegative(bbox)
        labels = tubes_from_paths(np.subtract(bbox[3:], bbox[:3]), paths,
                                  radius)
        return labels

    def pull_voxel(self,
                   seg_id: int,
                   v_id: int,
                   radius: int = 1) -> Tuple[np.ndarray, Bbox, np.ndarray]:
        """Pull a subvolume around a specified skeleton vertex with of shape [2r+1, 2r+1, 2r+1], in voxels.

        Arguments:
            seg_id: ID of the segment to use, depends on data in s3.
            v_id: ID of the vertex to use, depends on the segment.
            radius: Radius of pulled volume around central voxel, in voxels.
                Optional, default is 1 (3x3 volume is pulled, centered at the vertex).

        Returns:
            img: A 2*nx+1 X 2*ny+1 X 2*nz+1 volume.
            bounds: Bounding box object which contains the bounds of the volume.
            vox_in_img: List of coordinates which locate the initial point in the volume.
        """
        check_type(radius, (int, np.integer))
        if radius < 0:
            raise ValueError(f"{radius} should be nonnegative.")

        voxel = self._get_voxel(seg_id,
                                v_id)  # does type checking for seg_id and v_id
        bounds = Bbox(voxel, voxel)
        seed = bounds.to_list()
        shape = [radius] * 3
        bounds = Bbox(np.subtract(seed[:3], shape),
                      np.add(np.add(seed[3:], shape), 1))
        img = self.pull_bounds_img(bounds)
        # img = self.cv.download(bounds, mip=self.mip)
        vox_in_img = voxel - np.array(bounds.to_list()[:3])
        return np.squeeze(np.array(img)), bounds, vox_in_img

    def pull_vertex_list(
        self,
        seg_id: int,
        v_id_list: List[int],
        buffer: int = 1,
        expand: bool = False,
    ) -> Tuple[np.ndarray, Bbox, List[Tuple[int, int, int]]]:
        """Pull a subvolume containing all listed vertices.

        Arguments:
            seg_id: ID of the segment to use, depends on data in s3.
            v_id_list: list of vertex IDs to use.
            buffer: Buffer around the bounding box (in voxels). Default 1, set to 0 if expand is True.
            expand: Flag whether to expand subvolume to closest set of chunks.

        Returns:
            img: The image volume containing all vertices.
            bounds: Bounding box object which contains the bounds of the volume.
            vox_in_img_list: List of coordinates which locate the vertices in the volume.
        """
        check_type(seg_id, (int, np.integer))
        check_iterable_type(v_id_list, (int, np.integer))
        check_type(buffer, (int, np.integer))
        if buffer < 0:
            raise ValueError(f"Buffer {buffer} shouild not be negative.")
        check_type(expand, bool)
        if expand:
            buffer = 0
        buffer = [buffer] * 3

        voxel_list = [self._get_voxel(seg_id, i) for i in v_id_list]
        if len(voxel_list) == 1:  # edge case of 1 vertex
            bounds = Bbox(voxel_list[0] - buffer, voxel_list[0] + buffer + 1)
        else:
            voxel_list = np.array(voxel_list)
            lower = list(np.min(voxel_list, axis=0) - buffer)
            higher = list(np.max(voxel_list, axis=0) + buffer + 1)
            bounds = Bbox(lower, higher)
        if expand:
            bounds = bounds.expand_to_chunk_size(self.chunk_size)
        vox_in_img_list = np.array(voxel_list) - bounds.to_list()[:3]
        img = self.pull_bounds_img(bounds)
        return img, bounds, vox_in_img_list

    def pull_chunk(
        self,
        seg_id: int,
        v_id: int,
        radius: int = 0,
    ) -> Tuple[np.ndarray, Bbox, Tuple[int, int, int]]:
        """Pull a subvolume around a specified skeleton vertex according to chunk size.
        Each data set has a specified chunk size, which can be found by calling self.cv.info.

        Arguments:
            seg_id: ID of the segment to use, depends on data in s3.
            v_id: ID of the vertex to use, depends on the segment.
            radius: Radius of pulled volume around central chunk, in chunks.
                Optional, default is 0 (single chunk which contains the voxel).

        Returns:
            img: A chunk_size[0]*2*nx X chunk_size[1]*2*ny X chunk_size[2]*2*nz volume.
            bounds: Bounding box object which contains the bounds of the volume.
            vox_in_img: List of coordinates which locate the initial point in the volume.
        """
        check_type(seg_id, (int, np.integer))
        check_type(v_id, (int, np.integer))
        check_type(radius, (int, np.integer))
        if radius < 0:
            raise ValueError(f"Radius of {radius} should be nonnegative.")

        voxel = self._get_voxel(seg_id, v_id)
        bounds = Bbox(voxel, voxel).expand_to_chunk_size(self.chunk_size)
        seed = bounds.to_list()
        shape = [
            self.chunk_size[0] * radius,
            self.chunk_size[1] * radius,
            self.chunk_size[2] * radius,
        ]
        bounds = Bbox(np.subtract(seed[:3], shape), np.add(seed[3:], shape))
        img = self.pull_bounds_img(bounds)
        vox_in_img = voxel - np.array(bounds.to_list()[:3])
        return np.squeeze(np.array(img)), bounds, vox_in_img

    def pull_bounds_img(self, bounds: Bounds) -> np.ndarray:
        """Pull a volume around a specified bounding box. Works on image channels.

        Arguments:
            bounds: Bounding box, or tuple containing (x0, y0, z0, x1, y1, z1) bounds.

        Returns:
            img: Volume pulled according to the bounding box.
        """
        if isinstance(bounds, Bbox):
            bounds = bounds.to_list()
        check_iterable_type(bounds, (int, np.integer))
        check_iterable_nonnegative(bounds)
        img = self.cv.download(Bbox(bounds[:3], bounds[3:]), mip=self.mip)
        return np.squeeze(np.array(img))

    def pull_bounds_seg(self, bounds: Bounds) -> np.ndarray:
        """Pull a volume around a specified bounding box.
        Works on annotation channels.

        Arguments:
            bounds: Bounding box, or tuple containing (x0, y0, z0, x1, y1, z1) bounds.

        Returns:
            img: Volume pulled according to the bounding box.
        """
        raise NotImplementedError("Annotation channels not supported.")
        # if isinstance(bounds, Bbox):
        #     bounds = bounds.to_list()
        # check_iterable_type(bounds, (int, np.integer))
        # check_iterable_nonnegative(bounds)
        # if self.cv_annotations is None:
        #     raise ValueError("Cannot pull from undefined annotation layer.")

        # img = self.cv_annotations[Bbox(bounds[:3], bounds[3:])]
        # return np.squeeze(np.array(img))

    def push(
        self,
        img: np.ndarray,
        bounds: Bounds,
    ):
        """Push a volume to an annotation channel.

        Arguments:
            img : Volume to push
            bounds : Bounding box or tuple containing (x0, y0, z0, x1, y1, z1) bounds.
        """
        raise NotImplementedError("Annotation channels not supported.")
Exemplo n.º 6
0
class NeuroglancerSession:
    """
    Utility class which pulls and pushes data.

    Parameters
    ----------
    url : string
        URL of the s3 bucket to pull from and push to.

    mip : int, optional (default=0)
        Resolution level to pull and push at. 0 is the highest resolution.

    url_segments : string, optional (Default=None)
        URL of the s3 bucket to pull from and push to.

    Attributes
    ----------
    url : string
        URL of the s3 bucket to pull from and push to.

    cv : CloudVolume object
        CloudVolume object instantiated with the specified URL.

    mip : int
        Resolution level.

    chunk_size : list
        The chunk size of the volume at the specified mip, given as [x, y, z].

    scales : list
        The resolution of the volume at the specified mip, given as [x, y, z].
    """
    def __init__(
        self,
        url="s3://mouse-light-viz/precomputed_volumes/brain1",
        mip=1,
        url_segments=None,
    ):
        self.url = url
        self.cv = CloudVolume(self.url, parallel=False)
        self.mip = mip
        self.chunk_size = self.cv.info["scales"][self.mip]["chunk_sizes"][0]
        self.scales = self.cv.scales[self.mip]["resolution"]

        self.url_segments = url_segments
        self.cv_segments = None
        if self.url_segments is not None:
            self.cv_segments = CloudVolume(self.url_segments, parallel=False)

    def _get_voxel(self, seg_id, v_id):
        if self.cv_segments is None:
            skel = self.cv.skeleton.get(seg_id)
            vertex = skel.vertices[v_id]
            voxel = np.round(np.divide(vertex, self.scales)).astype(int)
        else:
            skel = self.cv_segments.skeleton.get(seg_id)
            vertex = skel.vertices[v_id]
            voxel = np.round(
                np.divide(vertex, self.cv_segments.scales[self.mip]
                          ["resolution"])).astype(int)
        return voxel

    def get_segments(self, seg_id, bbox):
        """
        Get a graph of points within a bounding box.

        Parameters
        ----------
        seg_id : int
            The segement number to pull
        bbox : :object: Bounding box
            The bounding box object

        Returns
        -------
        G : :class:`networkx.Graph`
        """
        df = read_s3(self.url_segments, seg_id, self.mip)
        G = df_to_graph(df)
        box = bbox.to_list()
        G_sub = get_sub_neuron(G, [box[:3], box[3:]])
        return G_sub

    def pull_voxel(self, seg_id, v_id, nx=1, ny=1, nz=1):
        """
        Pull a number of voxels around a specified skeleton vertex

        Parameters
        ----------
        seg_id : int
            ID of the segment to use, depends on data in s3.

        v_id : int
            ID of the vertex to use, depends on the segment.

        nx : int, optional (default=1)
            Number of voxels to pull on either side of the seed in x.

        ny : int, optional (default=1)
            Number of voxels to pull on either side of the seed in y.

        nz : int, optional (default=1)
            Number of voxels to pull on either side of the seed in z.

        Returns
        -------
        img : ndarray
            A 2*nx+1 X 2*ny+1 X 2*nz+1 volume.

        bounds : Bbox object
            Bounding box object which contains the bounds of the volume.

        vox_in_img : ndarray
            List of coordinates which locate the initial point in the volume.
        """
        voxel = self._get_voxel(seg_id, v_id)
        bounds = Bbox(voxel, voxel)
        seed = bounds.to_list()
        shape = [nx, ny, nz]
        bounds = Bbox(np.subtract(seed[:3], shape),
                      np.add(np.add(seed[3:], shape), 1))
        img = self.cv.download(bounds, mip=self.mip)
        vox_in_img = voxel - np.array(bounds.to_list()[:3])
        return np.squeeze(np.array(img)), bounds, vox_in_img

    def pull_vertex_list(self,
                         seg_id,
                         v_id_list,
                         buffer=[0, 0, 0],
                         expand=False):
        """
        Pull a region containing all listed vertices.

        Parameters
        ----------
        seg_id : int
            ID of the segment to use, depends on data in s3.

        v_id_list : list of ints
            list of vertex IDs to use.

        buffer : list of ints, optional (default=[0, 0, 0])
            Buffer around the bounding box of seed vertices (on lower and higher bound).

        expand : bool, optional (default=False)
            Flag whether to expand region to closest combination of chunks.

        Returns
        -------
        img : ndarray
            The image volume containing all vertices.

        bounds : Bbox object
            Bounding box object which contains the bounds of the volume.

        vox_in_img_list : ndarray, shape nx3
            List of coordinates which locate the vertices in the volume.
        """
        voxel_list = np.array([self._get_voxel(seg_id, i) for i in v_id_list])
        lower = list(np.min(voxel_list, axis=0) - buffer)
        higher = list(np.max(voxel_list, axis=0) + buffer)
        bounds = Bbox(lower, higher)
        if expand:
            bounds = bounds.expand_to_chunk_size(self.chunk_size)
            lower = bounds.to_list()[:3]
        img = self.pull_bounds_img(bounds)
        vox_in_img_list = voxel_list - lower
        return img, bounds, vox_in_img_list

    def pull_chunk(self, seg_id, v_id, nx=0, ny=0, nz=0):
        """
        Pull a number of chunks around a specified skeleton vertex

        Parameters
        ----------
        seg_id : int
            ID of the segment to use, depends on data in s3.

        v_id : int
            ID of the vertex to use, depends on the segment.

        nx : int, optional (default=0)
            Number of chunks to pull on either side of the main chunk in x.

        ny : int, optional (default=0)
            Number of chunks to pull on either side of the main chunk in y.

        nz : int, optional (default=0)
            Number of chunks to pull on either side of the main chunk in z.

        Returns
        -------
        img : ndarray
            A chunk_size[0]*2*nx X chunk_size[1]*2*ny X chunk_size[2]*2*nz
            volume.

        bounds : Bbox object
            Bounding box object which contains the bounds of the volume.

        vox_in_img : ndarray
            List of coordinates which locate the initial point in the volume.
        """
        voxel = self._get_voxel(seg_id, v_id)
        bounds = Bbox(voxel, voxel).expand_to_chunk_size(self.chunk_size)
        seed = bounds.to_list()
        shape = [
            self.chunk_size[0] * nx,
            self.chunk_size[1] * ny,
            self.chunk_size[2] * nz,
        ]
        bounds = Bbox(np.subtract(seed[:3], shape), np.add(seed[3:], shape))
        img = self.cv.download(bounds, mip=self.mip)
        vox_in_img = voxel - np.array(bounds.to_list()[:3])
        return np.squeeze(np.array(img)), bounds, vox_in_img

    def pull_bounds_img(self, bounds):
        """
        Pull a volume around a specified bounding box. Works on img channels.

        Parameters
        ----------
        bounds : Bbox object
            Tuple containing (x0, y0, z0, x1, y1, z1) bounds

        Returns
        -------
        img : ndarray
            pulled volume
        """
        img = self.cv.download(bounds, mip=self.mip)
        return np.squeeze(np.array(img))

    def pull_bounds_seg(self, bounds):
        """
        Pull a volume around a specified bounding box.
        Works on annotation channels.

        Parameters
        ----------
        bounds : Bbox object
            Tuple containing (x0, y0, z0, x1, y1, z1) bounds

        Returns
        -------
        img : ndarray
            pulled volume
        """
        img = self.cv[bounds]
        return np.squeeze(np.array(img))

    def push(self, img, bounds):
        """
        Push a volume.

        Parameters
        ----------
        img : ndarray
            Volume to push

        bounds : Bbox object
            Tuple containing (x0, y0, z0, x1, y1, z1) bounds
        """
        self.cv[bounds] = img.astype("uint64")

    def napari_viewer(self, img, labels=None, label_name="Segmentation"):
        viewer = napari.view_image(np.squeeze(np.array(img)))
        if labels is not None:
            viewer.add_labels(labels, name=label_name)
        return viewer
Exemplo n.º 7
0
class BigBrainVolume:
    """
    TODO use siibra requests cache
    
    """
    # function to switch x/y coordinates on a vector or matrix.
    # Note that direction doesn't matter here since the inverse is the same.
    switch_xy = lambda X: np.dot(np.identity(4)[[1, 0, 2, 3], :], X)

    # Gigabyte size that is considered feasible for ad-hoc downloads of
    # BigBrain data. This is used to avoid accidental huge downloads.
    gbyte_feasible = 0.5

    def __init__(self, ngsite, fill_missing=True):
        """
        ngsite: base url of neuroglancer http location
        """
        with requests.get(ngsite + '/transform.json') as r:
            self._translation_nm = np.array(json.loads(r.content))[:, -1]
        with requests.get(ngsite + '/info') as r:
            self.info = json.loads(r.content)
        self.volume = CloudVolume(ngsite,
                                  fill_missing=fill_missing,
                                  progress=False)
        self.ngsite = ngsite
        self.nbits = np.iinfo(self.volume.info['data_type']).bits
        self.bbox_phys = self._bbox_phys()
        self.resolutions_available = {
            np.min(v['resolution']) / 1000: {
                'mip': i,
                'GBytes': np.prod(v['size']) * self.nbits / (8 * 1024**3)
            }
            for i, v in enumerate(self.volume.scales)
        }
        self.helptext = "\n".join([
            "{:7.0f} micron {:10.4f} GByte".format(k, v['GBytes'])
            for k, v in self.resolutions_available.items()
        ])

    def largest_feasible_resolution(self):
        # returns the highest resolution in micrometer that is available and
        # still below the threshold of downloadable volume sizes.
        return min([
            res for res, v in self.resolutions_available.items()
            if v['GBytes'] < self.gbyte_feasible
        ])

    def affine(self, mip, clip=False):
        """
        Builds the affine matrix that maps voxels 
        at the given mip to physical space in mm.
        Parameters:
        -----------
        clip : Boolean, or Bbox
            If true, clip by computing the bounding box from nonempty pixels
            if False, get the complete data of the selected mip
            If Bbox, clip by this bounding box
        """

        # correct clipping offset, if needed
        voxelshift = np.identity(4)
        if (type(clip) == bool) and clip is True:
            voxelshift[:3, -1] = self._clipcoords(mip)[:3, 0]
        elif isinstance(clip, Bbox):
            voxelshift[:3, -1] = clip.minpt

        # retrieve the pixel resolution
        resolution_nm = self.info['scales'][mip]['resolution']

        # build affine matrix in nm physical space
        affine = np.identity(4)
        for i in range(3):
            affine[i, i] = resolution_nm[i]
            affine[i, -1] = self._translation_nm[i]

        # warp from nm to mm
        affine[:3, :] /= 1000000.

        return np.dot(affine, voxelshift)
        #return BigBrainVolume.switch_xy(np.dot(affine,voxelshift))

    def _clipcoords(self, mip):
        # compute clip coordinates in voxels for the given mip
        # from the pre-computed physical bounding box coordinates

        logger.debug(
            "Computing bounding box coordinates at mip {}".format(mip))
        phys2vox = np.linalg.inv(self.affine(mip))
        clipcoords = np.dot(phys2vox, self.bbox_phys).astype('int')
        # clip bounding box coordinates to actual shape of the mip
        clipcoords[:, 0] = np.maximum(clipcoords[:, 0], 0)
        clipcoords[:, 1] = np.minimum(clipcoords[:, 1],
                                      self.volume.mip_shape(mip))
        return clipcoords

    def _load_data(self, mip, clip=False, force=False):
        """
        Actually load image data.
        TODO: Check amount of data beforehand and raise an Exception if it is over a reasonable threshold.
        NOTE: this function caches chunks as numpy arrays (*.npy) to the
        CACHEDIR defined in the retrieval module.
        
        Parameters:
        -----------
        clip : Boolean, or Bbox
            If true, clip by computing the bounding box from nonempty pixels
            if False, get the complete data of the selected mip
            If Bbox, clip by this bounding box
        force : Boolean (default: False)
            if true, will start downloads even if they exceed the download
            threshold set in the gbytes_feasible member variable.
        """
        if (type(clip) == bool) and clip is True:
            clipcoords = self._clipcoords(mip)
            bbox = Bbox(clipcoords[:3, 0], clipcoords[:3, 1])
        elif isinstance(clip, Bbox):
            # make sure the bounding box is integer, some are not
            bbox = Bbox(
                np.array(clip.minpt).astype('int'),
                np.array(clip.maxpt).astype('int'))
        else:
            bbox = Bbox([0, 0, 0], self.volume.mip_shape(mip))
        gbytes = bbox.volume() * self.nbits / (8 * 1024**3)
        if not force and gbytes > BigBrainVolume.gbyte_feasible:
            # TODO would better do an estimate of the acutal data size
            logger.error(
                "Data request is too large (would result in an ~{:.2f} GByte download, the limit is {})."
                .format(gbytes, self.gbyte_feasible))
            print(self.helptext)
            raise RuntimeError(
                "The requested resolution is too high to provide a feasible download, but you can override this behavior with the 'force' parameter."
            )
        cachefile = retrieval.cachefile("{}{}{}".format(
            self.ngsite, bbox.serialize(), str(mip)).encode('utf8'),
                                        suffix='npy')
        if os.path.exists(cachefile):
            return np.load(cachefile)
        else:
            data = self.volume.download(bbox=bbox, mip=mip)
            np.save(cachefile, np.array(data))
            return np.array(data)

    def determine_mip(self, resolution=None):
        # given a resolution in micrometer, try to determine the mip that can
        # be used to move on.
        if resolution is None:
            maxres = self.largest_feasible_resolution()
            logger.info(
                'Using the largest feasible resolution of {} micron'.format(
                    maxres))
            return self.resolutions_available[maxres]['mip']
        elif resolution in self.resolutions_available.keys():
            return self.resolutions_available[resolution]['mip']
        logger.error(
            'The requested resolution ({} micron) is not available. Choose one of:'
            .format(resolution))
        print(self.helptext)
        return None

    def build_image(self,
                    resolution,
                    clip=True,
                    transform=lambda I: I,
                    force=False):
        """
        Compute and return a spatial image for the given mip.
        
        Parameters:
        -----------
        clip : Boolean, or Bbox
            If true, clip by computing the bounding box from nonempty pixels
            if False, get the complete data of the selected mip
            If Bbox, clip by this bounding box
        force : Boolean (default: False)
            If true, will start downloads even if they exceed the download
            threshold set in the gbytes_feasible member variable.
        """
        mip = self.determine_mip(resolution)
        if not mip:
            raise ValueError(
                "Invalid image resolution for this neuroglancer precomputed tile source."
            )
        return nib.Nifti1Image(transform(self._load_data(mip, clip, force)),
                               affine=self.affine(mip, clip))

    def _enclosing_chunkgrid(self, mip, bbox_phys):
        """
        Produce grid points representing the chunks of the mip 
        which enclose a given bounding box. The bounding box is given in
        physical coordinates, but the grid is returned in voxel spaces of the
        given mip.
        """

        # some helperfunctions to produce the smallest range on a grid enclosing another range
        cfloor = lambda x, s: int(np.floor(x / s) * s)
        cceil = lambda x, s: int(np.ceil(x / s) * s) + 1
        crange = lambda x0, x1, s: np.arange(cfloor(x0, s), cceil(x1, s), s)

        # project the bounding box to the voxel grid of the selected mip
        bb = np.dot(np.linalg.inv(self.affine(mip)), bbox_phys)

        # compute the enclosing chunk grid
        chunksizes = self.volume.scales[mip]['chunk_sizes'][0]
        x, y, z = [crange(bb[i][0], bb[i][1], chunksizes[i]) for i in range(3)]
        xx, yy, zz = np.meshgrid(x, y, z)
        return np.vstack(
            [xx.ravel(),
             yy.ravel(),
             zz.ravel(),
             zz.ravel() * 0 + 1])

    def _bbox_phys(self):
        """
        Estimates the bounding box of the nonzero values 
        in the data volume, in physical coordinates. 
        Estimation is done from the lowest resolution for 
        efficiency, so it is not fully accurate.
        """
        volume = self._load_data(-1, clip=False)
        affine = self.affine(-1, clip=False)
        bbox_vox = bbox3d(volume)
        return np.dot(affine, bbox_vox)
Exemplo n.º 8
0
class MeshTask(RegisteredTask):
    def __init__(self, shape, offset, layer_path, **kwargs):
        """
    Convert all labels in the specified bounding box into meshes
    via marching cubes and quadratic edge collapse (github.com/seung-lab/zmesh).

    Required:
      shape: (sx,sy,sz) size of task
      offset: (x,y,z) offset from (0,0,0)
      layer_path: neuroglancer/cloudvolume dataset path

    Optional:
      lod: (uint) level of detail to record these meshes at
      mip: (uint) level of the resolution pyramid to download segmentation from
      simplification_factor: (uint) try to reduce the number of triangles in the 
        mesh by this factor (but constrained by max_simplification_error)
      max_simplification_error: The maximum physical distance that
        simplification is allowed to move a triangle vertex by. 
      mesh_dir: which subdirectory to write the meshes to (overrides info file location)
      remap_table: agglomerate segmentation before meshing using { orig_id: new_id }
      generate_manifests: (bool) if it is known that the meshes generated by this 
        task will not be cropped by the bounding box, avoid needing to run a seperate
        MeshManifestTask pass by generating manifests on the spot.

      These two options are used to allow sufficient overlap for trivial mesh stitching
      between adjacent tasks.

        low_padding: (uint) expand the bounding box by this many pixels by subtracting
          this padding from the minimum point of the bounding box on all axes.
        high_padding: (uint) expand the bounding box by this many pixels adding
          this padding to the maximum point of the bounding box on all axes.

      parallel_download: (uint: 1) number of processes to use during the segmentation download
      cache_control: (str: None) specify the cache-control header when uploading mesh files
      dust_threshold: (uint: None) don't bother meshing labels strictly smaller than this number of voxels.
      encoding: (str) 'precomputed' (default) or 'draco'
      draco_compression_level: (uint: 1) only applies to draco encoding
      draco_create_metadata: (bool: False) only applies to draco encoding
      progress: (bool: False) show progress bars for meshing 
      object_ids: (list of ints) if specified, only mesh these ids
      fill_missing: (bool: False) replace missing segmentation files with zeros instead of erroring
      spatial_index: (bool: False) generate a JSON spatial index of which meshes are available in
        a given bounding box. 
      sharded: (bool: False) If True, upload all meshes together as a single pickled 
        fragment file. 
      timestamp: (int: None) (graphene only) use the segmentation existing at this
        UNIX timestamp.
    """
        super(MeshTask, self).__init__(shape, offset, layer_path, **kwargs)
        self.shape = Vec(*shape)
        self.offset = Vec(*offset)
        self.layer_path = layer_path
        self.options = {
            'cache_control': kwargs.get('cache_control', None),
            'draco_compression_level': kwargs.get('draco_compression_level',
                                                  1),
            'draco_create_metadata': kwargs.get('draco_create_metadata',
                                                False),
            'dust_threshold': kwargs.get('dust_threshold', None),
            'encoding': kwargs.get('encoding', 'precomputed'),
            'fill_missing': kwargs.get('fill_missing', False),
            'generate_manifests': kwargs.get('generate_manifests', False),
            'high_padding': kwargs.get('high_padding', 1),
            'low_padding': kwargs.get('low_padding', 0),
            'lod': kwargs.get('lod', 0),
            'max_simplification_error': kwargs.get('max_simplification_error',
                                                   40),
            'simplification_factor': kwargs.get('simplification_factor', 100),
            'mesh_dir': kwargs.get('mesh_dir', None),
            'mip': kwargs.get('mip', 0),
            'object_ids': kwargs.get('object_ids', None),
            'parallel_download': kwargs.get('parallel_download', 1),
            'progress': kwargs.get('progress', False),
            'remap_table': kwargs.get('remap_table', None),
            'spatial_index': kwargs.get('spatial_index', False),
            'sharded': kwargs.get('sharded', False),
            'timestamp': kwargs.get('timestamp', None),
            'agglomerate': kwargs.get('agglomerate', True),
            'stop_layer': kwargs.get('stop_layer', 2),
            'compress': kwargs.get('compress', 'gzip'),
        }
        supported_encodings = ['precomputed', 'draco']
        if not self.options['encoding'] in supported_encodings:
            raise ValueError(
                'Encoding {} is not supported. Options: {}'.format(
                    self.options['encoding'], ', '.join(supported_encodings)))
        self._encoding_to_compression_dict = {
            'precomputed': self.options['compress'],
            'draco': False,
        }

    def execute(self):
        self._volume = CloudVolume(self.layer_path,
                                   self.options['mip'],
                                   bounded=False,
                                   parallel=self.options['parallel_download'],
                                   fill_missing=self.options['fill_missing'])
        self._bounds = Bbox(self.offset, self.shape + self.offset)
        self._bounds = Bbox.clamp(self._bounds, self._volume.bounds)

        self.progress = bool(self.options['progress'])

        self._mesher = zmesh.Mesher(self._volume.resolution)

        # Marching cubes loves its 1vx overlaps.
        # This avoids lines appearing between
        # adjacent chunks.
        data_bounds = self._bounds.clone()
        data_bounds.minpt -= self.options['low_padding']
        data_bounds.maxpt += self.options['high_padding']

        self._mesh_dir = self.get_mesh_dir()

        if self.options['encoding'] == 'draco':
            self.draco_encoding_settings = self._compute_draco_encoding_settings(
            )

        # chunk_position includes the overlap specified by low_padding/high_padding
        # agglomerate, timestamp, stop_layer only applies to graphene volumes,
        # no-op for precomputed
        data = self._volume.download(data_bounds,
                                     agglomerate=self.options['agglomerate'],
                                     timestamp=self.options['timestamp'],
                                     stop_layer=self.options['stop_layer'])

        if not np.any(data):
            return

        data = self._remove_dust(data, self.options['dust_threshold'])
        data = self._remap(data)

        if self.options['object_ids']:
            data = fastremap.mask_except(data,
                                         self.options['object_ids'],
                                         in_place=True)

        data, renumbermap = fastremap.renumber(data, in_place=True)
        renumbermap = {v: k for k, v in renumbermap.items()}
        self.compute_meshes(data, renumbermap)

    def get_mesh_dir(self):
        if self.options['mesh_dir'] is not None:
            return self.options['mesh_dir']
        elif 'mesh' in self._volume.info:
            return self._volume.info['mesh']
        else:
            raise ValueError(
                "The mesh destination is not present in the info file.")

    def _compute_draco_encoding_settings(self):
        min_quantization_range = max(
            (self.shape + self.options['low_padding'] +
             self.options['high_padding']) * self._volume.resolution)
        max_draco_bin_size = np.floor(
            min(self._volume.resolution) / np.sqrt(2))
        draco_quantization_bits, draco_quantization_range, draco_bin_size = \
          calculate_draco_quantization_bits_and_range(min_quantization_range, max_draco_bin_size)
        draco_quantization_origin = self.offset - (self.offset %
                                                   draco_bin_size)
        return {
            'quantization_bits': draco_quantization_bits,
            'compression_level': self.options['draco_compression_level'],
            'quantization_range': draco_quantization_range,
            'quantization_origin': draco_quantization_origin,
            'create_metadata': self.options['draco_create_metadata']
        }

    def _remove_dust(self, data, dust_threshold):
        if dust_threshold:
            segids, pxct = fastremap.unique(data, return_counts=True)
            dust_segids = [
                sid for sid, ct in zip(segids, pxct)
                if ct < int(dust_threshold)
            ]
            data = fastremap.mask(data, dust_segids, in_place=True)

        return data

    def _remap(self, data):
        if self.options['remap_table'] is None:
            return data

        self.options['remap_table'] = {
            int(k): int(v)
            for k, v in self.options['remap_table'].items()
        }

        remap = self.options['remap_table']
        remap[0] = 0

        data = fastremap.mask_except(data, list(remap.keys()), in_place=True)
        return fastremap.remap(data, remap, in_place=True)

    def compute_meshes(self, data, renumbermap):
        data = data[:, :, :, 0].T
        self._mesher.mesh(data)
        del data

        bounding_boxes = {}
        meshes = {}

        for obj_id in tqdm(self._mesher.ids(),
                           disable=(not self.progress),
                           desc="Mesh"):
            remapped_id = renumbermap[obj_id]
            mesh_binary, mesh_bounds = self._create_mesh(obj_id)
            bounding_boxes[remapped_id] = mesh_bounds.to_list()
            meshes[remapped_id] = mesh_binary

        if self.options['sharded']:
            self._upload_batch(meshes, self._bounds)
        else:
            self._upload_individuals(meshes,
                                     self.options['generate_manifests'])

        if self.options['spatial_index']:
            self._upload_spatial_index(self._bounds, bounding_boxes)

    def _upload_batch(self, meshes, bbox):
        with SimpleStorage(self.layer_path,
                           progress=self.options['progress']) as stor:
            # Create mesh batch for postprocessing later
            stor.put_file(
                file_path="{}/{}.frags".format(self._mesh_dir,
                                               bbox.to_filename()),
                content=pickle.dumps(meshes),
                compress=self.options['compress'],
                content_type="application/python-pickle",
                cache_control=False,
            )

    def _upload_individuals(self, mesh_binaries, generate_manifests):
        with Storage(self.layer_path) as storage:
            for segid, mesh_binary in mesh_binaries.items():
                storage.put_file(file_path='{}/{}:{}:{}'.format(
                    self._mesh_dir, segid, self.options['lod'],
                    self._bounds.to_filename()),
                                 content=mesh_binary,
                                 compress=self._encoding_to_compression_dict[
                                     self.options['encoding']],
                                 cache_control=self.options['cache_control'])

                if generate_manifests:
                    fragments = []
                    fragments.append('{}:{}:{}'.format(
                        segid, self.options['lod'],
                        self._bounds.to_filename()))

                    storage.put_file(
                        file_path='{}/{}:{}'.format(self._mesh_dir, segid,
                                                    self.options['lod']),
                        content=json.dumps({"fragments": fragments}),
                        content_type='application/json',
                        cache_control=self.options['cache_control'])

    def _create_mesh(self, obj_id):
        mesh = self._mesher.get_mesh(
            obj_id,
            simplification_factor=self.options['simplification_factor'],
            max_simplification_error=self.options['max_simplification_error'])

        self._mesher.erase(obj_id)

        resolution = self._volume.resolution
        offset = self._bounds.minpt - self.options['low_padding']
        mesh.vertices[:] += offset * resolution

        mesh_bounds = Bbox(np.amin(mesh.vertices, axis=0),
                           np.amax(mesh.vertices, axis=0))

        if self.options['encoding'] == 'draco':
            mesh_binary = DracoPy.encode_mesh_to_buffer(
                mesh.vertices.flatten('C'), mesh.faces.flatten('C'),
                **self.draco_encoding_settings)
        elif self.options['encoding'] == 'precomputed':
            mesh_binary = mesh.to_precomputed()

        return mesh_binary, mesh_bounds

    def _upload_spatial_index(self, bbox, mesh_bboxes):
        with SimpleStorage(self.layer_path,
                           progress=self.options['progress']) as stor:
            stor.put_file(
                file_path="{}/{}.spatial".format(self._mesh_dir,
                                                 bbox.to_filename()),
                content=jsonify(mesh_bboxes).encode('utf8'),
                compress=self.options['compress'],
                content_type="application/json",
                cache_control=False,
            )
Exemplo n.º 9
0
def ImageShardDownsampleTask(
  src_path: str,
  shape: ShapeType,
  offset: ShapeType,
  mip: int = 0,
  fill_missing: bool = False,
  sparse: bool = False,
  agglomerate: bool = False,
  timestamp: Optional[int] = None,
  factor: ShapeType = (2,2,1)
):
  """
  Generate a single downsample level for a shard.
  Shards are usually hundreds of megabytes to several
  gigabyte of data, so it is usually unrealistic from a
  memory perspective to make more than one mip at a time.
  """
  shape = Vec(*shape)
  offset = Vec(*offset)
  mip = int(mip)
  fill_missing = bool(fill_missing)

  src_vol = CloudVolume(
    src_path, fill_missing=fill_missing, 
    mip=mip, bounded=False, progress=False
  )
  chunk_size = src_vol.meta.chunk_size(mip)

  bbox = Bbox(offset, offset + shape)
  bbox = Bbox.clamp(bbox, src_vol.meta.bounds(mip))
  bbox = bbox.expand_to_chunk_size(
    chunk_size, offset=src_vol.meta.voxel_offset(mip)
  )

  shard_shape = igneous.shards.image_shard_shape_from_spec(
    src_vol.scales[mip + 1]["sharding"], 
    src_vol.meta.volume_size(mip + 1), 
    src_vol.meta.chunk_size(mip + 1)
  )
  upper_offset = offset // Vec(*factor)
  shape_bbox = Bbox(upper_offset, upper_offset + shard_shape)
  shape_bbox = shape_bbox.astype(np.int64)
  shape_bbox = Bbox.clamp(shape_bbox, src_vol.meta.bounds(mip + 1))
  shape_bbox = shape_bbox.expand_to_chunk_size(src_vol.meta.chunk_size(mip + 1))

  if shape_bbox.subvoxel():
    return

  shard_shape = list(shape_bbox.size3()) + [ 1 ]

  output_img = np.zeros(shard_shape, dtype=src_vol.dtype)
  nz = int(math.ceil(bbox.dz / chunk_size.z))

  dsfn = tinybrain.downsample_with_averaging
  if src_vol.layer_type == "segmentation":
    dsfn = tinybrain.downsample_segmentation

  zbox = bbox.clone()
  zbox.maxpt.z = zbox.minpt.z + chunk_size.z
  for z in range(nz):
    img = src_vol.download(
      zbox, agglomerate=agglomerate, timestamp=timestamp
    )
    (ds_img,) = dsfn(img, factor, num_mips=1, sparse=sparse)
    # ds_img[slc] b/c sometimes the size round up in tinybrain
    # makes this too large by one voxel on an axis
    output_img[:,:,(z*chunk_size.z):(z+1)*chunk_size.z] = ds_img

    del img
    del ds_img
    zbox.minpt.z += chunk_size.z
    zbox.maxpt.z += chunk_size.z

  (filename, shard) = src_vol.image.make_shard(
    output_img, shape_bbox, (mip + 1), progress=False
  )
  basepath = src_vol.meta.join(
    src_vol.cloudpath, src_vol.meta.key(mip + 1)
  )
  CloudFiles(basepath).put(filename, shard)
Exemplo n.º 10
0
def TransferTask(
  src_path, dest_path,
  mip, shape, offset,
  translate=(0,0,0),
  fill_missing=False,
  skip_first=False,
  skip_downsamples=False,
  delete_black_uploads=False,
  background_color=0,
  sparse=False,
  axis='z',
  agglomerate=False,
  timestamp=None,
  compress='gzip',
  factor=None
):
  """
  Transfer an image to a new location while enabling
  rechunking, translation, reencoding, recompressing,
  and downsampling. For graphene, we can also generate
  proofread segmentation using the agglomerate flag.
  """
  shape = Vec(*shape)
  offset = Vec(*offset)
  fill_missing = bool(fill_missing)
  translate = Vec(*translate)
  delete_black_uploads = bool(delete_black_uploads)
  sparse = bool(sparse)
  skip_first = bool(skip_first)
  skip_downsamples = bool(skip_downsamples)

  src_cv = CloudVolume(
    src_path, fill_missing=fill_missing,
    mip=mip, bounded=False
  )
  dest_cv = CloudVolume(
    dest_path, fill_missing=fill_missing,
    mip=mip, delete_black_uploads=delete_black_uploads,
    background_color=background_color, compress=compress
  )

  dst_bbox = Bbox(offset, shape + offset)
  dst_bbox = Bbox.clamp(dst_bbox, dest_cv.bounds)

  if (
    skip_downsamples
    and agglomerate == False
    and src_cv.scale == dest_cv.scale
    and src_cv.dtype == dest_cv.dtype
    and np.all(translate == (0,0,0))
  ):
    # most efficient transfer type, just copy
    # files possibly without even decompressing
    src_cv.image.transfer_to(
      dest_path, dst_bbox, mip,
      compress=compress
    )
    return

  src_bbox = dst_bbox - translate
  image = src_cv.download(
    src_bbox, agglomerate=agglomerate, timestamp=timestamp
  )

  if skip_downsamples:
    dest_cv[dst_bbox] = image
  else:
    downsample_and_upload(
      image, dst_bbox, dest_cv,
      shape, mip=mip,
      skip_first=skip_first,
      sparse=sparse, axis=axis,
      factor=factor
    )