def transform_stream(self, stream): with closing_if_closable(stream): for obj in stream: mask, image = self.prepare_input(obj, ("mask", "image")) labels, nlabels = skimage.measure.label(mask, return_num=True) objects = ndi.find_objects(labels, nlabels) for i, slices in enumerate(objects): if slices is None: continue if self.padding: slices = self._enlarge_slice(slices, self.padding) props = RegionProperties(slices, i + 1, labels, image, True) if self.min_area is not None and props.area < self.min_area: continue if self.max_area is not None and props.area > self.max_area: continue yield self.prepare_output(obj.copy(), props) if nlabels == 0: if self.warn_empty is not False: warn_empty = self.prepare_input(obj, "warn_empty") if not isinstance(warn_empty, str): warn_empty = "Image" warnings.warn( f"{warn_empty} did not contain any objects.")
def transform_stream(self, stream): with closing_if_closable(stream): for obj in stream: archive_fn, img_rank = self.prepare_input( obj, ("archive_fn", "img_rank")) with Archive(archive_fn) as archive: index_fns = archive.find("*ecotaxa_*") for index_fn in index_fns: index_base = os.path.dirname(index_fn) with archive.read_member(index_fn) as index_fp: dataframe = pd.read_csv(index_fp, sep="\t", low_memory=False) dataframe = self._fix_types(dataframe) for _, row in dataframe.iterrows(): image_fn = os.path.join( index_base, row["img_file_name"]) with archive.read_member(image_fn) as image_fp: image = np.array(PIL.Image.open(image_fp)) yield self.prepare_output( obj.copy(), image, row.to_dict())
def transform_stream(self, stream): with closing_if_closable(stream): for obj in stream: lst_fn = self.prepare_input(obj, "lst_fn") # Convert to str to allow Path objects lst_fn = str(lst_fn) root_path, lst_name = os.path.split(lst_fn) lst_name = os.path.splitext(lst_name)[0] reader = _LstReader(lst_fn) for collage_file, data in itertools.groupby( reader, operator.itemgetter("collage_file")): # Load image collage collage_fn = os.path.join(root_path, collage_file) collage = np.array(PIL.Image.open(collage_fn)) # Load bin collage base, ext = os.path.splitext(collage_file) collage_bin_fn = os.path.join(root_path, "{}_bin{}".format(base, ext)) collage_bin = np.array( PIL.Image.open(collage_bin_fn)).astype(bool) for row in data: yield self.prepare_output( obj.copy(), FlowCamObject(row, lst_name, collage, collage_bin), )
def transform_stream(self, stream): with closing_if_closable(stream): for obj in stream: mask, image = self.prepare_input(obj, ("mask", "image")) labels, nlabels = skimage.measure.label(mask, return_num=True) objects = ndi.find_objects(labels, nlabels) for i, slices in enumerate(objects): if slices is None: continue if self.padding: slices = self._enlarge_slice(slices, self.padding) props = skimage.measure._regionprops.RegionProperties( # pylint: disable=protected-access slices, i + 1, labels, image, True) if self.min_area is not None and props.area < self.min_area: continue if self.max_area is not None and props.area > self.max_area: continue yield self.prepare_output(obj.copy(), props)
def transform_stream(self, stream): with closing_if_closable(stream): for obj in stream: archive_fn, img_rank = self.prepare_input( obj, ("archive_fn", "img_rank")) with zipfile.ZipFile(archive_fn, mode="r") as zip_file: index_names = fnmatch.filter(zip_file.namelist(), "ecotaxa_*") for index_name in index_names: index_base = os.path.dirname(index_name) with zip_file.open(index_name) as index_fp: dataframe = self._pd.read_csv(index_fp, sep="\t") dataframe = self._fix_types(dataframe) for _, row in dataframe.iterrows(): image_fn = os.path.join( index_base, row["img_file_name"]) with zip_file.open(image_fn) as image_fp: image = np.array(PIL.Image.open(image_fp)) yield self.prepare_output( obj.copy(), image, row.to_dict())
def transform_stream(self, stream): with closing_if_closable(stream): for obj in stream: path = self.prepare_input(obj, "path") reader = pims.PyAVReaderIndexed(path, **self.kwargs) for frame in reader: yield self.prepare_output(obj.copy(), frame)
def transform_stream(self, stream): with closing_if_closable(stream): for obj in stream: pathname, recursive = self.prepare_input(obj, ("pathname", "recursive")) # Convert to str to allow Path objects in Python 3.5 pathname = str(pathname) for path in glob.iglob(pathname, recursive=recursive): yield self.prepare_output(obj.copy(), path)
def transform_stream(self, stream): with closing_if_closable(stream): for obj in stream: root = self.prepare_input(obj, "root") # Convert to str to allow Path objects in Python 3.5 root = str(root) for root, _, filenames in os.walk(root): for fn in filenames: ext = os.path.splitext(fn)[1] # Skip non-allowed extensions if ext not in self.extensions: continue yield self.prepare_output(obj.copy(), os.path.join(root, fn))
def transform_stream(self, stream): with closing_if_closable(stream): for obj in stream: path, meta, series, kwargs = self.prepare_input( obj, ("path", "meta", "series", "kwargs")) reader = pims.bioformats.BioformatsReader(path, meta=meta, **kwargs) if series is None: series = range(reader.size_series) else: series = [series] for s in series: reader.series = s for frame in reader: yield self.prepare_output(obj.copy(), frame, s)
def transform_stream(self, stream): pil_extensions = PIL.Image.registered_extensions() with closing_if_closable(stream), zipfile.ZipFile( self.archive_fn, mode="w") as zip_file: dataframe = [] i = 0 for obj in stream: fnames_images, meta = self.prepare_input( obj, ("fnames_images", "meta")) for img_rank, (fname, img) in enumerate(fnames_images, start=1): img_ext = os.path.splitext(fname)[1] pil_format = pil_extensions[img_ext] img = PIL.Image.fromarray(img) img_fp = io.BytesIO() img.save(img_fp, format=pil_format) zip_file.writestr(fname, img_fp.getvalue()) dataframe.append({ **meta, "img_file_name": fname, "img_rank": img_rank }) yield obj i += 1 dataframe = self._pd.DataFrame(dataframe) # Insert types into header type_header = [dtype_to_ecotaxa(dt) for dt in dataframe.dtypes] dataframe.columns = self._pd.MultiIndex.from_tuples( list(zip(dataframe.columns, type_header))) zip_file.writestr( self.meta_fn, dataframe.to_csv(sep="\t", encoding="utf-8", index=False)) print("Wrote {:,d} objects to {}.".format(i, self.archive_fn))
def transform_stream(self, stream): with closing_if_closable(stream): for obj in stream: root = self.prepare_input(obj, "root") # Convert to str to allow Path objects in Python 3.5 root = str(root) for root, dirnames, filenames in os.walk(root): if self.sort: dirnames[:] = sorted(dirnames) filenames = sorted(filenames) if self.verbose: print(f"Found {len(filenames):,d} files in {root}.") for fn in filenames: ext = os.path.splitext(fn)[1] # Skip non-allowed extensions if ext not in self.extensions: continue yield self.prepare_output(obj.copy(), os.path.join(root, fn))
def transform_stream(self, stream): pil_extensions = PIL.Image.registered_extensions() with closing_if_closable(stream), Archive(self.archive_fn, "w") as archive: dataframe = [] i = 0 for obj in stream: ( fnames_images, meta, object_meta, acq_meta, process_meta, sample_meta, ) = self.prepare_input( obj, ( "fnames_images", "meta", "object_meta", "acq_meta", "process_meta", "sample_meta", ), ) if meta is None: meta = {} if object_meta is not None: meta.update( ("object_" + k, v) for k, v in object_meta.items()) if acq_meta is not None: meta.update(("acq_" + k, v) for k, v in acq_meta.items()) if process_meta is not None: meta.update( ("process_" + k, v) for k, v in process_meta.items()) if sample_meta is not None: meta.update( ("sample_" + k, v) for k, v in sample_meta.items()) for img_rank, (fname, img) in enumerate(fnames_images, start=1): img_ext = os.path.splitext(fname)[1] pil_format = pil_extensions[img_ext] img = PIL.Image.fromarray(img) img_fp = io.BytesIO() try: img.save(img_fp, format=pil_format) except: print(f"Error writing {fname}") raise archive.write_member(fname, img_fp.getvalue()) dataframe.append({ **meta, "img_file_name": fname, "img_rank": img_rank }) yield obj i += 1 dataframe = pd.DataFrame(dataframe) # Insert types into header type_header = [dtype_to_ecotaxa(dt) for dt in dataframe.dtypes] dataframe.columns = pd.MultiIndex.from_tuples( list(zip(dataframe.columns, type_header))) archive.write_member( self.meta_fn, io.BytesIO( dataframe.to_csv(sep="\t", encoding="utf-8", index=False).encode()), ) print("Wrote {:,d} objects to {}.".format(i, self.archive_fn))
def __iter__(self): with closing_if_closable(self.stream) as stream: for obj in stream: yield (self.node.prepare_input(obj, ("image", )), _Envelope(obj))