class SaveImages(Module): module_name = "SaveImages" variable_revision_number = 16 category = "File Processing" def create_settings(self): self.save_image_or_figure = Choice( "Select the type of image to save", IF_ALL, IF_IMAGE, doc="""\ The following types of images can be saved as a file on the hard drive: - *{IF_IMAGE}:* Any of the images produced upstream of **SaveImages** can be selected for saving. Outlines of objects created by other modules such as **Identify** modules, **Watershed**, and various object processing modules can also be saved with this option, but you must use the **OverlayOutlines** module to create them prior to saving images. Likewise, if you wish to save the objects themselves, you must use the **ConvertObjectsToImage** module to create a savable image. - *{IF_MASK}:* Relevant only if a module that produces masks has been used such as **Crop**, **MaskImage**, or **MaskObjects**. These modules create a mask of the pixels of interest in the image. Saving the mask will produce a binary image in which the pixels of interest are set to 1; all other pixels are set to 0. - *{IF_CROPPING}:* Relevant only if the **Crop** module is used. The **Crop** module also creates a cropping image which is typically the same size as the original image. However, since **Crop** permits removal of the rows and columns that are left blank, the cropping can be of a different size than the mask. - *{IF_MOVIE}:* A sequence of images can be saved as a TIFF stack. """.format( **{ "IF_CROPPING": IF_CROPPING, "IF_IMAGE": IF_IMAGE, "IF_MASK": IF_MASK, "IF_MOVIE": IF_MOVIE, } ), ) self.image_name = ImageSubscriber( "Select the image to save", doc="Select the image you want to save." ) self.file_name_method = Choice( "Select method for constructing file names", [FN_FROM_IMAGE, FN_SEQUENTIAL, FN_SINGLE_NAME], FN_FROM_IMAGE, doc="""\ *(Used only if saving non-movie files)* Several choices are available for constructing the image file name: - *{FN_FROM_IMAGE}:* The filename will be constructed based on the original filename of an input image specified in **NamesAndTypes**. You will have the opportunity to prefix or append additional text. If you have metadata associated with your images, you can append text to the image filename using a metadata tag. This is especially useful if you want your output given a unique label according to the metadata corresponding to an image group. The name of the metadata to substitute can be provided for each image for each cycle using the **Metadata** module. - *{FN_SEQUENTIAL}:* Same as above, but in addition, each filename will have a number appended to the end that corresponds to the image cycle number (starting at 1). - *{FN_SINGLE_NAME}:* A single name will be given to the file. Since the filename is fixed, this file will be overwritten with each cycle. In this case, you would probably want to save the image on the last cycle (see the *Select how often to save* setting). The exception to this is to use a metadata tag to provide a unique label, as mentioned in the *{FN_FROM_IMAGE}* option. {USING_METADATA_TAGS_REF} {USING_METADATA_HELP_REF} """.format( **{ "FN_FROM_IMAGE": FN_FROM_IMAGE, "FN_SEQUENTIAL": FN_SEQUENTIAL, "FN_SINGLE_NAME": FN_SINGLE_NAME, "USING_METADATA_HELP_REF": _help.USING_METADATA_HELP_REF, "USING_METADATA_TAGS_REF": _help.USING_METADATA_TAGS_REF, } ), ) self.file_image_name = FileImageSubscriber( "Select image name for file prefix", "None", doc="""\ *(Used only when “{FN_FROM_IMAGE}” is selected for constructing the filename)* Select an image loaded using **NamesAndTypes**. The original filename will be used as the prefix for the output filename.""".format( **{"FN_FROM_IMAGE": FN_FROM_IMAGE} ), ) self.single_file_name = Text( SINGLE_NAME_TEXT, "OrigBlue", metadata=True, doc="""\ *(Used only when “{FN_SEQUENTIAL}” or “{FN_SINGLE_NAME}” are selected for constructing the filename)* Specify the filename text here. If you have metadata associated with your images, enter the filename text with the metadata tags. {USING_METADATA_TAGS_REF} Do not enter the file extension in this setting; it will be appended automatically.""".format( **{ "FN_SEQUENTIAL": FN_SEQUENTIAL, "FN_SINGLE_NAME": FN_SINGLE_NAME, "USING_METADATA_TAGS_REF": _help.USING_METADATA_TAGS_REF, } ), ) self.number_of_digits = Integer( "Number of digits", 4, doc="""\ *(Used only when “{FN_SEQUENTIAL}” is selected for constructing the filename)* Specify the number of digits to be used for the sequential numbering. Zeros will be used to left-pad the digits. If the number specified here is less than that needed to contain the number of image sets, the latter will override the value entered.""".format( **{"FN_SEQUENTIAL": FN_SEQUENTIAL} ), ) self.wants_file_name_suffix = Binary( "Append a suffix to the image file name?", False, doc="""\ Select "*{YES}*" to add a suffix to the image’s file name. Select "*{NO}*" to use the image name as-is. """.format( **{"NO": "No", "YES": "Yes"} ), ) self.file_name_suffix = Text( "Text to append to the image name", "", metadata=True, doc="""\ *(Used only when constructing the filename from the image filename)* Enter the text that should be appended to the filename specified above. If you have metadata associated with your images, you may use metadata tags. {USING_METADATA_TAGS_REF} Do not enter the file extension in this setting; it will be appended automatically. """.format( **{"USING_METADATA_TAGS_REF": _help.USING_METADATA_TAGS_REF} ), ) self.file_format = Choice( "Saved file format", [FF_JPEG, FF_NPY, FF_PNG, FF_TIFF, FF_H5], value=FF_TIFF, doc="""\ *(Used only when saving non-movie files)* Select the format to save the image(s). Only *{FF_TIFF}* supports saving as 16-bit or 32-bit. *{FF_TIFF}* is a "lossless" file format. *{FF_PNG}* is also a "lossless" file format and it tends to produce smaller files without losing any image data. *{FF_JPEG}* is also small but is a "lossy" file format and should not be used for any images that will undergo further quantitative analysis. Select *{FF_NPY}* to save an illumination correction image generated by **CorrectIlluminationCalculate**. Select *{FF_H5}* to save files to be used for Ilastik pixel classificaiton. The images should be correctly recognized as yxcz images.""".format( **{ "FF_NPY": FF_NPY, "FF_TIFF": FF_TIFF, "FF_PNG": FF_PNG, "FF_JPEG": FF_JPEG, "FF_H5": FF_H5, } ), ) self.pathname = SaveImagesDirectoryPath( "Output file location", self.file_image_name, doc="""\ This setting lets you choose the folder for the output files. {IO_FOLDER_CHOICE_HELP_TEXT} An additional option is the following: - *Same folder as image*: Place the output file in the same folder that the source image is located. {IO_WITH_METADATA_HELP_TEXT} If the subfolder does not exist when the pipeline is run, CellProfiler will create it. If you are creating nested subfolders using the sub-folder options, you can specify the additional folders separated with slashes. For example, “Outlines/Plate1” will create a “Plate1” folder in the “Outlines” folder, which in turn is under the Default Input/Output Folder. The use of a forward slash (“/”) as a folder separator will avoid ambiguity between the various operating systems. """.format( **{ "IO_FOLDER_CHOICE_HELP_TEXT": _help.IO_FOLDER_CHOICE_HELP_TEXT, "IO_WITH_METADATA_HELP_TEXT": _help.IO_WITH_METADATA_HELP_TEXT, } ), ) self.bit_depth = Choice( "Image bit depth", [BIT_DEPTH_8, BIT_DEPTH_16, BIT_DEPTH_FLOAT, BIT_DEPTH_RAW], doc=f"""\ Select the bit-depth at which you want to save the images. *{BIT_DEPTH_FLOAT}* saves the image as floating-point decimals with 32-bit precision. When the input data is integer or binary type, pixel values are scaled within the range (0, 1). Floating point data is not rescaled. *{BIT_DEPTH_16}* and *{BIT_DEPTH_FLOAT}* images are supported only for TIFF formats. Data is normally checked and transformed to ensure that it matches the selected format's requirements. Selecting *{BIT_DEPTH_RAW}* will attempt to automatically save to a compatible format without applying any transformations to the data. This could be used to save integer labels in 32-bit float format if you had more labels than the 16-bit format can handle (without rescaling to the 0-1 range of *{BIT_DEPTH_FLOAT}*). Note that because the data validation step is skipped some images may fail to save if they contain unusable data. Note: Opening exported multichannel 16-bit TIFF stacks in ImageJ may require the BioFormats Importer plugin due to the compression method used by CellProfiler.""", ) self.tiff_compress = Binary( "Save with lossless compression?", value=True, doc="""\ *(Used only when saving 2D images as file type tiff)* Choose whether or not to use lossless compression when saving images. This will lead to smaller file sizes, but somewhat longer module execution time. Note that the value of this setting will be ignored when saving 3D tiff images, which have been saved by default with compression since CellProfiler 3.1. Do not use for multichannel tiff images created as Stacks in GrayToColor.""" ) self.stack_axis = Choice( "How to save the series", [AXIS_T, AXIS_Z], value=AXIS_T, doc="""\ *(Used only when saving movie/stack files)* This setting determines how planes are saved into a movie/stack. Selecting "T" will save planes as a time series. Selecting "Z" will save planes as slices in a 3D z-axis. """, ) self.overwrite = Binary( "Overwrite existing files without warning?", False, doc="""\ Select "*{YES}*" to automatically overwrite a file if it already exists. Select "*{NO}*" to be prompted for confirmation first. If you are running the pipeline on a computing cluster, select "*{YES}*" since you will not be able to intervene and answer the confirmation prompt.""".format( **{"NO": "No", "YES": "Yes"} ), ) self.when_to_save = Choice( "When to save", [WS_EVERY_CYCLE, WS_FIRST_CYCLE, WS_LAST_CYCLE], WS_EVERY_CYCLE, doc="""\ *(Used only when saving non-movie files)* Specify at what point during pipeline execution to save file(s). - *{WS_EVERY_CYCLE}:* Useful for when the image of interest is created every cycle and is not dependent on results from a prior cycle. - *{WS_FIRST_CYCLE}:* Useful for when you are saving an aggregate image created on the first cycle, e.g., **CorrectIlluminationCalculate** with the *All* setting used on images obtained directly from **NamesAndTypes**. - *{WS_LAST_CYCLE}:* Useful for when you are saving an aggregate image completed on the last cycle, e.g., **CorrectIlluminationCalculate** with the *All* setting used on intermediate images generated during each cycle.""".format( **{ "WS_EVERY_CYCLE": WS_EVERY_CYCLE, "WS_FIRST_CYCLE": WS_FIRST_CYCLE, "WS_LAST_CYCLE": WS_LAST_CYCLE, } ), ) self.update_file_names = Binary( "Record the file and path information to the saved image?", False, doc="""\ Select "*{YES}*" to store filename and pathname data for each of the new files created via this module as a per-image measurement. Instances in which this information may be useful include: - Exporting measurements to a database, allowing access to the saved image. If you are using the machine-learning tools or image viewer in CellProfiler Analyst, for example, you will want to enable this setting if you want the saved images to be displayed along with the original images.""".format( **{"YES": "Yes"} ), ) self.create_subdirectories = Binary( "Create subfolders in the output folder?", False, doc=""" Select "*{YES}*" to create subfolders to match the input image folder structure. For example, if your input images are organized into subfolders (e.g., for each plate, well, animal, etc.), this option allows you to mirror some or all of that nested folder structure in the output folder.""".format( **{"YES": "Yes"} ), ) self.root_dir = Directory( "Base image folder", doc="""\ *Used only if creating subfolders in the output folder* In subfolder mode, **SaveImages** determines the folder for an output image file by examining the path of the matching input file. You should choose as **Base image folder** the input folder that has the structure you'd like to mirror in the output folder. Consider an example where your input images are stored in a nested folder structure of "images\/experiment-name\/plate-name" (i.e., your files are in folders for each plate, nested inside of folders for each experiment, nested in a parent folder called "images"). If you select the base image folder to be **images**, **SaveImages** will go to your "Output file location" and save images in subfolders "experiment-name\/plate-name" that corresponds to each input image. If the base image folder chosen is one level deeper at "images\/experiment-name", **SaveImages** will store images in subfolders for each "plate-name" they belong to. **Warning**: Do not select the same folder you selected for "Output file location" as this can lead to unexpected behavior like saving in the original input file directory. For safety, ensure "Overwrite existing files without warning?" is set to "No" while testing this option. """, ) def settings(self): """Return the settings in the order to use when saving""" return [ self.save_image_or_figure, self.image_name, self.file_name_method, self.file_image_name, self.single_file_name, self.number_of_digits, self.wants_file_name_suffix, self.file_name_suffix, self.file_format, self.pathname, self.bit_depth, self.overwrite, self.when_to_save, self.update_file_names, self.create_subdirectories, self.root_dir, self.stack_axis, self.tiff_compress, ] def visible_settings(self): """Return only the settings that should be shown""" result = [self.save_image_or_figure, self.image_name, self.file_name_method] if self.file_name_method == FN_FROM_IMAGE: result += [self.file_image_name, self.wants_file_name_suffix] if self.wants_file_name_suffix: result.append(self.file_name_suffix) elif self.file_name_method == FN_SEQUENTIAL: self.single_file_name.text = SEQUENTIAL_NUMBER_TEXT # XXX - Change doc, as well! result.append(self.single_file_name) result.append(self.number_of_digits) elif self.file_name_method == FN_SINGLE_NAME: self.single_file_name.text = SINGLE_NAME_TEXT result.append(self.single_file_name) else: raise NotImplementedError( "Unhandled file name method: %s" % self.file_name_method ) if self.save_image_or_figure != IF_MOVIE: result.append(self.file_format) supports_16_bit = ( self.file_format in (FF_TIFF, FF_H5) and self.save_image_or_figure == IF_IMAGE ) or self.save_image_or_figure == IF_MOVIE if supports_16_bit: # TIFF supports 8 & 16-bit, all others are written 8-bit result.append(self.bit_depth) if self.file_format == FF_TIFF: result.append(self.tiff_compress) if self.save_image_or_figure == IF_MOVIE: result.append(self.stack_axis) result.append(self.pathname) result.append(self.overwrite) if self.save_image_or_figure != IF_MOVIE: result.append(self.when_to_save) result.append(self.update_file_names) if self.file_name_method == FN_FROM_IMAGE: result.append(self.create_subdirectories) if self.create_subdirectories: result.append(self.root_dir) return result @property def module_key(self): return "%s_%d" % (self.module_name, self.module_num) def prepare_group(self, workspace, grouping, image_numbers): d = self.get_dictionary(workspace.image_set_list) if self.save_image_or_figure == IF_MOVIE: d["N_FRAMES"] = len(image_numbers) d["CURRENT_FRAME"] = 0 return True def prepare_to_create_batch(self, workspace, fn_alter_path): self.pathname.alter_for_create_batch_files(fn_alter_path) if self.create_subdirectories: self.root_dir.alter_for_create_batch_files(fn_alter_path) def run(self, workspace): """Run the module pipeline - instance of cellprofiler_core.pipeline for this run workspace - the workspace contains: image_set - the images in the image set being processed object_set - the objects (labeled masks) in this image set measurements - the measurements for this run frame - display within this frame (or None to not display) """ if self.save_image_or_figure.value in (IF_IMAGE, IF_MASK, IF_CROPPING): should_save = self.run_image(workspace) elif self.save_image_or_figure == IF_MOVIE: self.run_movie(workspace) else: raise NotImplementedError( ("Saving a %s is not yet supported" % self.save_image_or_figure) ) workspace.display_data.filename = self.get_filename( workspace, make_dirs=False, check_overwrite=False ) def is_aggregation_module(self): """SaveImages is an aggregation module when it writes movies""" return ( self.save_image_or_figure == IF_MOVIE or self.when_to_save == WS_LAST_CYCLE ) def display(self, workspace, figure): if self.show_window: if self.save_image_or_figure == IF_MOVIE: return figure.set_subplots((1, 1)) outcome = ( "Wrote %s" if workspace.display_data.wrote_image else "Did not write %s" ) figure.subplot_table(0, 0, [[outcome % workspace.display_data.filename]]) def run_image(self, workspace): """Handle saving an image""" # # First, check to see if we should save this image # if self.when_to_save == WS_FIRST_CYCLE: d = self.get_dictionary(workspace.image_set_list) if workspace.measurements["Image", "Group_Index",] > 1: workspace.display_data.wrote_image = False self.save_filename_measurements(workspace) return d["FIRST_IMAGE"] = False elif self.when_to_save == WS_LAST_CYCLE: workspace.display_data.wrote_image = False self.save_filename_measurements(workspace) return self.save_image(workspace) return True def run_movie(self, workspace): out_file = self.get_filename(workspace, check_overwrite=False) # overwrite checks are made only for first frame. d = self.get_dictionary(workspace.image_set_list) if d["CURRENT_FRAME"] == 0 and os.path.exists(out_file): if not self.check_overwrite(out_file, workspace): d["CURRENT_FRAME"] = "Ignore" return else: # Have to delete the old movie before making the new one os.remove(out_file) elif d["CURRENT_FRAME"] == "Ignore": return image = workspace.image_set.get_image(self.image_name.value) pixels = image.pixel_data if self.get_bit_depth() == BIT_DEPTH_8: pixels = skimage.util.img_as_ubyte(pixels) pixel_type = bioformats.omexml.PT_UINT8 elif self.get_bit_depth() == BIT_DEPTH_16: pixels = skimage.util.img_as_uint(pixels) pixel_type = bioformats.omexml.PT_UINT16 elif self.get_bit_depth() == BIT_DEPTH_FLOAT: pixels = skimage.util.img_as_float32(pixels) pixel_type = bioformats.omexml.PT_FLOAT else: raise ValueError("Bit depth unsupported in movie mode") frames = d["N_FRAMES"] current_frame = d["CURRENT_FRAME"] d["CURRENT_FRAME"] += 1 if self.stack_axis == AXIS_T: self.do_save_image( workspace, out_file, pixels, pixel_type, t=current_frame, size_t=frames, ) else: self.do_save_image( workspace, out_file, pixels, pixel_type, z=current_frame, size_z=frames, ) def post_group(self, workspace, *args): if self.when_to_save == WS_LAST_CYCLE and self.save_image_or_figure != IF_MOVIE: try: self.save_image(workspace) except ValueError: raise ValueError( "You have tried to save %s on the last cycle but that cycle failed FlagImages. Please adjust the FlagImages settings and rerun" % (self.image_name.value) ) def do_save_image( self, workspace, filename, pixels, pixel_type, c=0, z=0, t=0, size_c=1, size_z=1, size_t=1, channel_names=None, ): """Save image using bioformats workspace - the current workspace filename - save to this filename pixels - the image to save pixel_type - save using this pixel type c - the image's channel index z - the image's z index t - the image's t index sizeC - # of channels in the stack sizeZ - # of z stacks sizeT - # of timepoints in the stack channel_names - names of the channels (make up names if not present """ bioformats.formatwriter.write_image( filename, pixels, pixel_type, c=c, z=z, t=t, size_c=size_c, size_z=size_z, size_t=size_t, channel_names=channel_names, ) def save_image(self, workspace): if self.show_window: workspace.display_data.wrote_image = False filename = self.get_filename(workspace) if filename is None: # failed overwrite check return image = workspace.image_set.get_image(self.image_name.value) volumetric_extensions = [FF_NPY, FF_TIFF, FF_H5] if image.volumetric and self.file_format.value not in volumetric_extensions: raise RuntimeError( "Unsupported file format {} for 3D pipeline. Use {} format when processing images as 3D.".format( self.file_format.value, ", or ".join(volumetric_extensions) ) ) if self.save_image_or_figure.value == IF_IMAGE: pixels = image.pixel_data elif self.save_image_or_figure.value == IF_MASK: pixels = image.mask elif self.save_image_or_figure.value == IF_CROPPING: pixels = image.crop_mask if self.file_format == FF_NPY: numpy.save(filename, pixels) else: save_kwargs = {} if self.get_bit_depth() == BIT_DEPTH_8: pixels = skimage.util.img_as_ubyte(pixels) elif self.get_bit_depth() == BIT_DEPTH_16: pixels = skimage.util.img_as_uint(pixels) elif self.get_bit_depth() == BIT_DEPTH_FLOAT: pixels = skimage.util.img_as_float32(pixels) elif self.get_bit_depth() == BIT_DEPTH_RAW: # No bit depth transformation pass # skimage will save out color images (M,N,3) or (M,N,4) appropriately # but any more than that will need to be transposed so they conform to the # CYX convention rather than YXC # http://scikit-image.org/docs/dev/api/skimage.io.html#skimage.io.imsave if ( not image.volumetric and len(pixels.shape) > 2 and image.channelstack and self.file_format.value == FF_TIFF ): pixels = numpy.transpose(pixels, (2, 0, 1)) save_kwargs.update({'imagej':True}) if (image.volumetric or self.tiff_compress.value) and self.file_format.value == FF_TIFF: save_kwargs.update({"compress": 6}) if self.file_format.value == FF_H5: save_h5(filename, pixels, volumetric=image.volumetric) else: skimage.io.imsave(filename, pixels, **save_kwargs) if self.show_window: workspace.display_data.wrote_image = True if self.when_to_save != WS_LAST_CYCLE: self.save_filename_measurements(workspace) def check_overwrite(self, filename, workspace): """Check to see if it's legal to overwrite a file Throws an exception if can't overwrite and no interaction available. Returns False if can't overwrite, otherwise True. """ if not self.overwrite.value and os.path.isfile(filename): try: return ( workspace.interaction_request( self, workspace.measurements.image_set_number, filename ) == "Yes" ) except workspace.NoInteractionException: raise ValueError( 'SaveImages: trying to overwrite %s in headless mode, but Overwrite files is set to "No"' % filename ) return True def handle_interaction(self, image_set_number, filename): """handle an interaction request from check_overwrite()""" import wx dlg = wx.MessageDialog( wx.GetApp().TopWindow, "%s #%d, set #%d - Do you want to overwrite %s?" % (self.module_name, self.module_num, image_set_number, filename), "Warning: overwriting file", wx.YES_NO | wx.ICON_QUESTION, ) result = dlg.ShowModal() == wx.ID_YES return "Yes" if result else "No" def save_filename_measurements(self, workspace): if self.update_file_names.value: filename = self.get_filename( workspace, make_dirs=False, check_overwrite=False ) pn, fn = os.path.split(filename) url = cellprofiler_core.utilities.pathname.pathname2url(filename) workspace.measurements.add_measurement( "Image", self.file_name_feature, fn, ) workspace.measurements.add_measurement( "Image", self.path_name_feature, pn, ) workspace.measurements.add_measurement( "Image", self.url_feature, url, ) @property def file_name_feature(self): return "_".join((C_FILE_NAME, self.image_name.value)) @property def path_name_feature(self): return "_".join((C_PATH_NAME, self.image_name.value)) @property def url_feature(self): return "_".join((C_URL, self.image_name.value)) @property def source_file_name_feature(self): """The file name measurement for the exemplar disk image""" return "_".join((C_FILE_NAME, self.file_image_name.value)) def source_path(self, workspace): """The path for the image data, or its first parent with a path""" if self.file_name_method.value == FN_FROM_IMAGE: path_feature = "%s_%s" % (C_PATH_NAME, self.file_image_name.value,) assert workspace.measurements.has_feature("Image", path_feature), ( "Image %s does not have a path!" % self.file_image_name.value ) return workspace.measurements.get_current_image_measurement(path_feature) # ... otherwise, chase the cpimage hierarchy looking for an image with a path cur_image = workspace.image_set.get_image(self.image_name.value) while cur_image.path_name is None: cur_image = cur_image.parent_image assert ( cur_image is not None ), "Could not determine source path for image %s' % (self.image_name.value)" return cur_image.path_name def get_measurement_columns(self, pipeline): if self.update_file_names.value: return [ ("Image", self.file_name_feature, COLTYPE_VARCHAR_FILE_NAME,), ("Image", self.path_name_feature, COLTYPE_VARCHAR_PATH_NAME,), ] else: return [] def get_filename(self, workspace, make_dirs=True, check_overwrite=True): """Concoct a filename for the current image based on the user settings""" measurements = workspace.measurements if self.file_name_method == FN_SINGLE_NAME: filename = self.single_file_name.value filename = workspace.measurements.apply_metadata(filename) elif self.file_name_method == FN_SEQUENTIAL: filename = self.single_file_name.value filename = workspace.measurements.apply_metadata(filename) n_image_sets = workspace.measurements.image_set_count ndigits = int(numpy.ceil(numpy.log10(n_image_sets + 1))) ndigits = max((ndigits, self.number_of_digits.value)) padded_num_string = str(measurements.image_set_number).zfill(ndigits) filename = "%s%s" % (filename, padded_num_string) else: file_name_feature = self.source_file_name_feature filename = measurements.get_current_measurement("Image", file_name_feature) filename = os.path.splitext(filename)[0] if self.wants_file_name_suffix: suffix = self.file_name_suffix.value suffix = workspace.measurements.apply_metadata(suffix) filename += suffix filename = "%s.%s" % (filename, self.get_file_format()) pathname = self.pathname.get_absolute_path(measurements) if self.create_subdirectories: image_path = self.source_path(workspace) subdir = os.path.relpath(image_path, self.root_dir.get_absolute_path()) pathname = os.path.join(pathname, subdir) if len(pathname) and not os.path.isdir(pathname) and make_dirs: try: os.makedirs(pathname) except: # # On cluster, this can fail if the path was created by # another process after this process found it did not exist. # if not os.path.isdir(pathname): raise result = os.path.join(pathname, filename) if check_overwrite and not self.check_overwrite(result, workspace): return if check_overwrite and os.path.isfile(result): try: os.remove(result) except: import bioformats bioformats.clear_image_reader_cache() os.remove(result) return result def get_file_format(self): """Return the file format associated with the extension in self.file_format """ if self.save_image_or_figure == IF_MOVIE: return FF_TIFF return self.file_format.value def get_bit_depth(self): if self.save_image_or_figure in ( IF_IMAGE, IF_MOVIE, ) and self.get_file_format() in (FF_TIFF, FF_H5): return self.bit_depth.value else: return BIT_DEPTH_8 def upgrade_settings(self, setting_values, variable_revision_number, module_name): if variable_revision_number == 11: if setting_values[0] == "Objects": raise NotImplementedError( "Unsupported image type: Objects. Use <i>ConvertObjectsToImage</i> to create an image." ) if setting_values[10] in ("bmp", "mat"): raise NotImplementedError( "Unsupported file format: {}".format(setting_values[10]) ) elif setting_values[10] == "tif": setting_values[10] = FF_TIFF elif setting_values[10] == "jpg": setting_values[10] = FF_JPEG new_setting_values = setting_values[:2] new_setting_values += setting_values[4:15] new_setting_values += setting_values[18:-1] setting_values = new_setting_values if setting_values[10] == "8": setting_values[10] = BIT_DEPTH_8 elif setting_values[10] == "16": setting_values[10] = BIT_DEPTH_16 variable_revision_number = 12 if variable_revision_number == 12: if setting_values[10] == "64-bit floating point": setting_values[10] = BIT_DEPTH_FLOAT variable_revision_number = 13 if variable_revision_number == 13: variable_revision_number = 14 if variable_revision_number == 14: # Renamed "Movie" to "Movie/Stack" if setting_values[0] == "Movie": setting_values[0] = IF_MOVIE # Added movie save axis setting_values.append(AXIS_T) variable_revision_number = 15 if variable_revision_number == 15: setting_values.append(False) variable_revision_number == 16 return setting_values, variable_revision_number def validate_module(self, pipeline): if self.save_image_or_figure in ( IF_IMAGE, IF_MASK, IF_CROPPING, ) and self.when_to_save in (WS_FIRST_CYCLE, WS_EVERY_CYCLE): # # Make sure that the image name is available on every cycle # for setting in get_name_providers(pipeline, self.image_name): if setting.provided_attributes.get("available_on_last"): # # If we fell through, then you can only save on the last cycle # raise ValidationError( "%s is only available after processing all images in an image group" % self.image_name.value, self.when_to_save, ) # XXX - should check that if file_name_method is # FN_FROM_IMAGE, that the named image actually has the # required path measurement # Make sure metadata tags exist if self.file_name_method == FN_SINGLE_NAME or ( self.file_name_method == FN_FROM_IMAGE and self.wants_file_name_suffix.value ): text_str = ( self.single_file_name.value if self.file_name_method == FN_SINGLE_NAME else self.file_name_suffix.value ) undefined_tags = pipeline.get_undefined_metadata_tags(text_str) if len(undefined_tags) > 0: raise ValidationError( "%s is not a defined metadata tag. Check the metadata specifications in your load modules" % undefined_tags[0], self.single_file_name if self.file_name_method == FN_SINGLE_NAME else self.file_name_suffix, ) def volumetric(self): return True
class ExportToACC(Module): module_name = 'ExportToACC' category = ["File Processing", "Data Tools"] variable_revision_number = 2 def create_settings(self): self.directory = Directory("Output file location", dir_choices=[ ABSOLUTE_FOLDER_NAME, DEFAULT_OUTPUT_FOLDER_NAME, DEFAULT_OUTPUT_SUBFOLDER_NAME, DEFAULT_INPUT_FOLDER_NAME, DEFAULT_INPUT_SUBFOLDER_NAME ], doc=""" This setting lets you choose the folder for the output files. %(IO_FOLDER_CHOICE_HELP_TEXT)s %(IO_WITH_METADATA_HELP_TEXT)s """ % globals()) self.directory.dir_choice = DEFAULT_OUTPUT_FOLDER_NAME self.wants_file_name_suffix = Binary( "Append a suffix to the file name?", False, doc=""" Select *"YES"* to add a suffix to the file name. Select *"NO"* to use the file name as-is. """ % globals()) self.file_name_suffix = Text("Text to append to the file name", "", metadata=True, doc=""" "*(Used only when constructing the filename from the image filename)*" Enter the text that should be appended to the filename specified above. """) self.wants_overwrite_without_warning = Binary( "Overwrite without warning?", False, doc="""This setting either prevents or allows overwriting of old .txt files by **ExportToACC** without confirmation. Select "*YES*" to overwrite without warning any .txt file that already exists. Select "*NO*" to prompt before overwriting when running CellProfiler in the GUI and to fail when running headless. """ % globals()) self.nan_representation = Choice("Representation of Nan/Inf", [NANS_AS_NANS, NANS_AS_NULLS], doc=""" This setting controls the output for numeric fields if the calculated value is infinite (*"Inf"*) or undefined (*"NaN*"). CellProfiler will produce Inf or NaN values under certain rare circumstances, for instance when calculating the mean intensity of an object within a masked region of an image. - "*%(NANS_AS_NULLS)s:*" Output these values as empty fields. - "*%(NANS_AS_NANS)s:*" Output them as the strings "NaN", "Inf" or "-Inf". """ % globals()) self.pick_columns = Binary("Select the measurements to export", False, doc=""" Select *"YES"* to provide a button that allows you to select which measurements you want to export. This is useful if you know exactly what measurements you want included in the final spreadheet(s). """ % globals()) self.columns = MeasurementMultiChoice( "Press button to select measurements to export", doc=""" "*(Used only when selecting the columns of measurements to export)*" This setting controls the columns to be exported. Press the button and check the measurements or categories to export.""") self.file_image_name = FileImageSubscriber( "Select image name for file prefix", "None", doc=""" Select an image loaded using **NamesAndTypes**. The original filename will be used as the prefix for the output filename.""" % globals()) def settings(self): """Return the settings in the order used when storing """ result = [ self.pick_columns, self.directory, self.columns, self.nan_representation, self.wants_file_name_suffix, self.file_name_suffix, self.wants_overwrite_without_warning, self.file_image_name ] return result def visible_settings(self): """Return the settings as seen by the user""" result = [ self.directory, self.file_image_name, self.wants_file_name_suffix ] if self.wants_file_name_suffix: result += [self.file_name_suffix] result += [ self.wants_overwrite_without_warning, self.nan_representation, self.pick_columns ] if self.pick_columns: result += [self.columns] return result def validate_module(self, pipeline): '''Test the module settings to make sure they are internally consistent''' '''Make sure metadata tags exist''' if self.wants_file_name_suffix.value: text_str = self.file_name_suffix.value undefined_tags = pipeline.get_undefined_metadata_tags(text_str) if len(undefined_tags) > 0: raise ValidationError( "%s is not a defined metadata tag. Check the metadata specifications in your load modules" % undefined_tags[0], self.file_name_suffix) def validate_module_warnings(self, pipeline): '''Warn user re: Test mode ''' if pipeline.test_mode: raise ValidationError( "ExportToACC will not produce output in Test Mode", self.directory) def prepare_run(self, workspace): '''Prepare an image set to be run workspace - workspace with image set populated (at this point) returns False if analysis can't be done ''' return self.check_overwrite(workspace) def run(self, workspace): # all of the work is done in post_run() if self.show_window: image_set_number = workspace.measurements.image_set_number header = ["Filename"] columns = [] path = self.make_image_file_name(workspace, image_set_number) columns.append((path, )) workspace.display_data.header = header workspace.display_data.columns = columns def display(self, workspace, figure): figure.set_subplots(( 1, 1, )) if workspace.display_data.columns is None: figure.subplot_table(0, 0, [["Data written to acc files"]]) elif workspace.pipeline.test_mode: figure.subplot_table( 0, 0, [["Data not written to acc files in test mode"]]) else: figure.subplot_table(0, 0, workspace.display_data.columns, col_labels=workspace.display_data.header) def run_as_data_tool(self, workspace): '''Run the module as a data tool For ExportToACC, we do the "post_run" method in order to write out the .txt files as if the experiment had just finished. ''' # # Set the measurements to the end of the list to mimic the state # at the end of the run. # m = workspace.measurements m.image_set_number = m.image_set_count self.post_run(workspace) def post_run(self, workspace): '''Save measurements at end of run''' # # Don't export in test mode # #if workspace.pipeline.test_mode: # return object_names = self.filter_object_names( workspace.measurements.get_object_names()) self.run_objects(object_names, workspace) def should_stop_writing_measurements(self): '''All subsequent modules should not write measurements''' return True def get_metadata_groups(self, workspace, settings_group=None): '''Find the metadata groups that are relevant for creating the file name workspace - the workspace with the image set metadata elements and grouping measurements populated. settings_group - if saving individual objects, this is the settings group that controls naming the files. ''' if settings_group is None or settings_group.wants_automatic_file_name: tags = [] else: tags = find_metadata_tokens(settings_group.file_name.value) if self.directory.is_custom_choice: tags += find_metadata_tokens(self.directory.custom_path) metadata_groups = workspace.measurements.group_by_metadata(tags) return metadata_groups def run_objects(self, object_names, workspace, settings_group=None): """Create a file based on the object names object_names - a sequence of object names (or Image or Experiment) which tell us which objects get piled into each file workspace - get the images from here. settings_group - if present, use the settings group for naming. """ metadata_groups = self.get_metadata_groups(workspace, settings_group) for metadata_group in metadata_groups: self.make_object_file(object_names, metadata_group.image_numbers, workspace, settings_group) def make_full_filename(self, file_name, workspace=None, image_set_number=None): """Convert a file name into an absolute path We do a few things here: * apply metadata from an image set to the file name if an image set is specified * change the relative path into an absolute one using the "." and "&" convention * Create any directories along the path """ if image_set_number is not None and workspace is not None: file_name = workspace.measurements.apply_metadata( file_name, image_set_number) measurements = None if workspace is None else workspace.measurements path_name = self.directory.get_absolute_path(measurements, image_set_number) file_name = os.path.join(path_name, file_name) path, fname = os.path.split(file_name) if not os.path.isdir(path): os.makedirs(path) return os.path.join(path, fname) def extension(self): '''Return the appropriate extension for the txt file name The appropriate extension is "txt" ''' return "txt" def make_image_file_name(self, workspace, image_set_number, settings_group=None): '''Make file name for objects measured from an image :param workspace: the current workspace :param image_set_number: the current image set number :param settings_group: the settings group used to name the file ''' imagename = workspace.measurements.get_measurement( IMAGE, "FileName_" + self.file_image_name.value, image_set_number) filename = "%s" % os.path.splitext(imagename)[0] if self.wants_file_name_suffix: suffix = self.file_name_suffix.value suffix = workspace.measurements.apply_metadata( suffix, image_set_number) filename += suffix filename = "%s.%s" % (filename, self.extension()) return self.make_full_filename(filename, workspace, image_set_number) def check_overwrite(self, workspace): """Make sure it's ok to overwrite any existing files before starting run workspace - workspace with all image sets already populated returns True if ok to proceed, False if user cancels """ if self.wants_overwrite_without_warning: return True files_to_check = [] metadata_groups = self.get_metadata_groups(workspace) for metadata_group in metadata_groups: image_number = metadata_group.image_numbers[0] files_to_check.append( self.make_image_file_name(workspace, image_number)) files_to_overwrite = filter(os.path.isfile, files_to_check) if len(files_to_overwrite) > 0: if get_headless(): logger.error( "ExportToACC is configured to refrain from overwriting files and the following file(s) already exist: %s" % ", ".join(files_to_overwrite)) return False msg = "Overwrite the following file(s)?\n" +\ "\n".join(files_to_overwrite) import wx result = wx.MessageBox( msg, caption="ExportToACC: Overwrite existing files", style=wx.YES_NO | wx.NO_DEFAULT | wx.ICON_QUESTION) if result != wx.YES: return False return True def filter_columns(self, features, object_name): if self.pick_columns: columns = [ self.columns.get_measurement_feature(x) for x in self.columns.selections if self.columns.get_measurement_object(x) == object_name ] columns = set(columns) features = [x for x in features if x in columns] return features def filter_object_names(self, object_names): object_names.remove('Image') object_names.remove('Experiment') return object_names def make_object_file(self, object_names, image_set_numbers, workspace, settings_group=None): """Make a file containing object measurements object_names - sequence of names of the objects whose measurements will be included image_set_numbers - the image sets whose data gets extracted workspace - workspace containing the measurements settings_group - the settings group used to choose to make the file """ m = workspace.measurements acc_file_name = os.path.join( os.path.dirname( self.make_image_file_name(workspace, image_set_numbers[0], settings_group)), ACC_FILE_NAME) features = [] objects_with_selected_features = [] center_x = ("", "") center_y = ("", "") for object_name in object_names: if not object_name in m.get_object_names(): continue rfeatures = m.get_feature_names(object_name) rfeatures = self.filter_columns(rfeatures, object_name) ofeatures = [ x for x in rfeatures if not [y for y in REMOVE_FEAT if y in x] ] ofeatures = [(object_name, feature_name) for feature_name in ofeatures] ofeatures.sort() features += ofeatures # Haggish way to find feature to use as object coordinates if ofeatures: objects_with_selected_features.append(object_name) coord = [feat for feat in rfeatures if "Location_Center_" in feat] for feat in coord: if (not center_x or "Nuclei" == object_name) and "Center_X" in feat: center_x = (object_name, feat) if (not center_y or "Nuclei" == object_name) and "Center_Y" in feat: center_y = (object_name, feat) features.insert(0, center_y) features.insert(0, center_x) # Write ACC file try: fd = open(acc_file_name, "w") for feat in features: fd.write(feat[0] + "_" + feat[1] + "\n") fd.close() except: pass for img_number in image_set_numbers: try: file_name = self.make_image_file_name(workspace, img_number, settings_group) fd = open(file_name, "w") writer = csv.writer(fd, delimiter=DELIMITER) object_count =\ np.max([m.get_measurement(IMAGE, "Count_%s"%name, img_number) for name in objects_with_selected_features]) object_count = int(object_count) if object_count else 0 columns = [ np.repeat(img_number, object_count) if feature_name == IMAGE_NUMBER else np.arange(1, object_count + 1) if feature_name == OBJECT_NUMBER else np. repeat(m.get_measurement(IMAGE, feature_name, img_number), object_count) if object_name == IMAGE else m.get_measurement(object_name, feature_name, img_number) for object_name, feature_name in features ] for obj_index in range(object_count): row = [ column[obj_index] if (column is not None and obj_index < column.shape[0]) else np.NAN for column in columns ] if self.nan_representation == NANS_AS_NULLS: row = [ "" if (field is None) or (np.isreal(field) and not np.isfinite(field)) else field for field in row ] writer.writerow(row) fd.close() except: pass def prepare_to_create_batch(self, workspace, fn_alter_path): '''Prepare to create a batch file This function is called when CellProfiler is about to create a file for batch processing. It will pickle the image set list's "legacy_fields" dictionary. This callback lets a module prepare for saving. pipeline - the pipeline to be saved image_set_list - the image set list to be saved fn_alter_path - this is a function that takes a pathname on the local host and returns a pathname on the remote host. It handles issues such as replacing backslashes and mapping mountpoints. It should be called for every pathname stored in the settings or legacy fields. ExportToACC has to convert the path to file names to something that can be used on the cluster. ''' self.directory.alter_for_create_batch_files(fn_alter_path) return True def upgrade_settings(self, setting_values, variable_revision_number, module_name): """Adjust the setting values based on the version that saved them """ if variable_revision_number == 1: setting_values = (setting_values[:4] + [NO, ""] + setting_values[4:]) variable_revision_number = 2 # Standardize input/output directory name references SLOT_DIRCHOICE = 1 directory = setting_values[SLOT_DIRCHOICE] directory = Directory.upgrade_setting(directory) setting_values = (setting_values[:SLOT_DIRCHOICE] + [directory] + setting_values[SLOT_DIRCHOICE + 1:]) return setting_values, variable_revision_number
class FilterObjects(ObjectProcessing): module_name = "FilterObjects" variable_revision_number = 8 def create_settings(self): super(FilterObjects, self).create_settings() self.x_name.text = """Select the objects to filter""" self.x_name.doc = """\ Select the set of objects that you want to filter. This setting also controls which measurement choices appear for filtering: you can only filter based on measurements made on the object you select. Be sure the **FilterObjects** module is downstream of the necessary **Measure** modules. If you intend to use a measurement calculated by the **CalculateMath** module to to filter objects, select the first operand’s object here, because **CalculateMath** measurements are stored with the first operand’s object.""" self.y_name.text = """Name the output objects""" self.y_name.doc = "Enter a name for the collection of objects that are retained after applying the filter(s)." self.spacer_1 = Divider(line=False) self.mode = Choice( "Select the filtering mode", [MODE_MEASUREMENTS, MODE_RULES, MODE_BORDER, MODE_CLASSIFIERS], doc="""\ You can choose from the following options: - *{MODE_MEASUREMENTS}*: Specify a per-object measurement made by an upstream module in the pipeline. - *{MODE_BORDER}*: Remove objects touching the border of the image and/or the edges of an image mask. - *{MODE_RULES}*: Use a file containing rules generated by CellProfiler Analyst. You will need to ensure that the measurements specified by the rules file are produced by upstream modules in the pipeline. This setting is not compatible with data processed as 3D. - *{MODE_CLASSIFIERS}*: Use a file containing a trained classifier from CellProfiler Analyst. You will need to ensure that the measurements specified by the file are produced by upstream modules in the pipeline. This setting is not compatible with data processed as 3D.""".format( **{ "MODE_MEASUREMENTS": MODE_MEASUREMENTS, "MODE_RULES": MODE_RULES, "MODE_BORDER": MODE_BORDER, "MODE_CLASSIFIERS": MODE_CLASSIFIERS, } ), ) self.spacer_2 = Divider(line=False) self.measurements = [] self.measurement_count = HiddenCount(self.measurements, "Measurement count") self.add_measurement(False) self.add_measurement_button = DoSomething( "", "Add another measurement", self.add_measurement ) self.filter_choice = Choice( "Select the filtering method", FI_ALL, FI_LIMITS, doc="""\ *(Used only if filtering using measurements)* There are five different ways to filter objects: - *{FI_LIMITS}:* Keep an object if its measurement value falls within a range you specify. - *{FI_MAXIMAL}:* Keep the object with the maximum value for the measurement of interest. If multiple objects share a maximal value, retain one object selected arbitrarily per image. - *{FI_MINIMAL}:* Keep the object with the minimum value for the measurement of interest. If multiple objects share a minimal value, retain one object selected arbitrarily per image. - *{FI_MAXIMAL_PER_OBJECT}:* This option requires you to choose a parent object. The parent object might contain several child objects of choice (for instance, mitotic spindles within a cell or FISH probe spots within a nucleus). Only the child object whose measurements equal the maximum child-measurement value among that set of child objects will be kept (for example, the longest spindle in each cell). You do not have to explicitly relate objects before using this module. - *{FI_MINIMAL_PER_OBJECT}:* Same as *Maximal per object*, except filtering is based on the minimum value.""".format( **{ "FI_LIMITS": FI_LIMITS, "FI_MAXIMAL": FI_MAXIMAL, "FI_MINIMAL": FI_MINIMAL, "FI_MAXIMAL_PER_OBJECT": FI_MAXIMAL_PER_OBJECT, "FI_MINIMAL_PER_OBJECT": FI_MINIMAL_PER_OBJECT, } ), ) self.per_object_assignment = Choice( "Assign overlapping child to", PO_ALL, doc="""\ *(Used only if filtering per object)* A child object can overlap two parent objects and can have the maximal/minimal measurement of all child objects in both parents. This option controls how an overlapping maximal/minimal child affects filtering of other children of its parents and to which parent the maximal child is assigned. The choices are: - *{PO_BOTH}*: The child will be assigned to both parents and all other children of both parents will be filtered. Only the maximal child per parent will be left, but if **RelateObjects** is used to relate the maximal child to its parent, one or the other of the overlapping parents will not have a child even though the excluded parent may have other child objects. The maximal child can still be assigned to both parents using a database join via the relationships table if you are using **ExportToDatabase** and separate object tables. - *{PO_PARENT_WITH_MOST_OVERLAP}*: The child will be assigned to the parent with the most overlap and a child with a less maximal/minimal measurement, if available, will be assigned to other parents. Use this option to ensure that parents with an alternate non-overlapping child object are assigned some child object by a subsequent **RelateObjects** module.""".format( **{ "PO_BOTH": PO_BOTH, "PO_PARENT_WITH_MOST_OVERLAP": PO_PARENT_WITH_MOST_OVERLAP, } ), ) self.enclosing_object_name = LabelSubscriber( "Select the objects that contain the filtered objects", "None", doc="""\ *(Used only if a per-object filtering method is selected)* This setting selects the container (i.e., parent) objects for the *{FI_MAXIMAL_PER_OBJECT}* and *{FI_MINIMAL_PER_OBJECT}* filtering choices.""".format( **{ "FI_MAXIMAL_PER_OBJECT": FI_MAXIMAL_PER_OBJECT, "FI_MINIMAL_PER_OBJECT": FI_MINIMAL_PER_OBJECT, } ), ) self.rules_directory = Directory( "Select the location of the rules or classifier file", doc="""\ *(Used only when filtering using {MODE_RULES} or {MODE_CLASSIFIERS})* Select the location of the rules or classifier file that will be used for filtering. {IO_FOLDER_CHOICE_HELP_TEXT} """.format( **{ "MODE_CLASSIFIERS": MODE_CLASSIFIERS, "MODE_RULES": MODE_RULES, "IO_FOLDER_CHOICE_HELP_TEXT": _help.IO_FOLDER_CHOICE_HELP_TEXT, } ), ) self.rules_class = Choice( "Class number", choices=["1", "2"], choices_fn=self.get_class_choices, doc="""\ *(Used only when filtering using {MODE_RULES} or {MODE_CLASSIFIERS})* Select which of the classes to keep when filtering. The CellProfiler Analyst classifier user interface lists the names of the classes in left-to-right order. **FilterObjects** uses the first class from CellProfiler Analyst if you choose “1”, etc. Please note the following: - The object is retained if the object falls into the selected class. - You can make multiple class selections. If you do so, the module will retain the object if the object falls into any of the selected classes.""".format( **{"MODE_CLASSIFIERS": MODE_CLASSIFIERS, "MODE_RULES": MODE_RULES} ), ) def get_directory_fn(): """Get the directory for the rules file name""" return self.rules_directory.get_absolute_path() def set_directory_fn(path): dir_choice, custom_path = self.rules_directory.get_parts_from_path(path) self.rules_directory.join_parts(dir_choice, custom_path) self.rules_file_name = Filename( "Rules or classifier file name", "rules.txt", get_directory_fn=get_directory_fn, set_directory_fn=set_directory_fn, doc="""\ *(Used only when filtering using {MODE_RULES} or {MODE_CLASSIFIERS})* The name of the rules or classifier file. A rules file is a plain text file containing the complete set of rules. Each line of the rules file should be a rule naming a measurement to be made on the object you selected, for instance: IF (Nuclei_AreaShape_Area < 351.3, [0.79, -0.79], [-0.94, 0.94]) The above rule will score +0.79 for the positive category and -0.94 for the negative category for nuclei whose area is less than 351.3 pixels and will score the opposite for nuclei whose area is larger. The filter adds positive and negative and keeps only objects whose positive score is higher than the negative score. A classifier file is a trained classifier exported from CellProfiler Analyst. You will need to ensure that the measurements specified by the file are produced by upstream modules in the pipeline. This setting is not compatible with data processed as 3D. """.format( **{"MODE_CLASSIFIERS": MODE_CLASSIFIERS, "MODE_RULES": MODE_RULES} ), ) self.additional_objects = [] self.additional_object_count = HiddenCount( self.additional_objects, "Additional object count" ) self.spacer_3 = Divider(line=False) self.additional_object_button = DoSomething( "Relabel additional objects to match the filtered object?", "Add an additional object", self.add_additional_object, doc="""\ Click this button to add an object to receive the same post-filtering labels as the filtered object. This is useful in making sure that labeling is maintained between related objects (e.g., primary and secondary objects) after filtering.""", ) def get_class_choices(self, pipeline): if self.mode == MODE_CLASSIFIERS: return self.get_bin_labels() elif self.mode == MODE_RULES: rules = self.get_rules() nclasses = len(rules.rules[0].weights[0]) return [str(i) for i in range(1, nclasses + 1)] def get_rules_class_choices(self, pipeline): try: rules = self.get_rules() nclasses = len(rules.rules[0].weights[0]) return [str(i) for i in range(1, nclasses + 1)] except: return [str(i) for i in range(1, 3)] def add_measurement(self, can_delete=True): """Add another measurement to the filter list""" group = SettingsGroup() group.append( "measurement", Measurement( "Select the measurement to filter by", self.x_name.get_value, "AreaShape_Area", doc="""\ *(Used only if filtering using {MODE_MEASUREMENTS})* See the **Measurements** modules help pages for more information on the features measured.""".format( **{"MODE_MEASUREMENTS": MODE_MEASUREMENTS} ), ), ) group.append( "wants_minimum", Binary( "Filter using a minimum measurement value?", True, doc="""\ *(Used only if {FI_LIMITS} is selected for filtering method)* Select "*{YES}*" to filter the objects based on a minimum acceptable object measurement value. Objects which are greater than or equal to this value will be retained.""".format( **{"FI_LIMITS": FI_LIMITS, "YES": "Yes"} ), ), ) group.append("min_limit", Float("Minimum value", 0)) group.append( "wants_maximum", Binary( "Filter using a maximum measurement value?", True, doc="""\ *(Used only if {FI_LIMITS} is selected for filtering method)* Select "*{YES}*" to filter the objects based on a maximum acceptable object measurement value. Objects which are less than or equal to this value will be retained.""".format( **{"FI_LIMITS": FI_LIMITS, "YES": "Yes"} ), ), ) group.append("max_limit", Float("Maximum value", 1)) group.append("divider", Divider()) self.measurements.append(group) if can_delete: group.append( "remover", RemoveSettingButton( "", "Remove this measurement", self.measurements, group ), ) def add_additional_object(self): group = SettingsGroup() group.append( "object_name", LabelSubscriber("Select additional object to relabel", "None"), ) group.append( "target_name", LabelName("Name the relabeled objects", "FilteredGreen"), ) group.append( "remover", RemoveSettingButton( "", "Remove this additional object", self.additional_objects, group ), ) group.append("divider", Divider(line=False)) self.additional_objects.append(group) def prepare_settings(self, setting_values): """Make sure the # of slots for additional objects matches the anticipated number of additional objects""" additional_object_count = int(setting_values[ADDITIONAL_OBJECT_SETTING_INDEX]) while len(self.additional_objects) > additional_object_count: self.remove_additional_object(self.additional_objects[-1].key) while len(self.additional_objects) < additional_object_count: self.add_additional_object() measurement_count = int(setting_values[MEASUREMENT_COUNT_SETTING_INDEX]) while len(self.measurements) > measurement_count: del self.measurements[-1] while len(self.measurements) < measurement_count: self.add_measurement() def settings(self): settings = super(FilterObjects, self).settings() settings += [ self.mode, self.filter_choice, self.enclosing_object_name, self.rules_directory, self.rules_file_name, self.rules_class, self.measurement_count, self.additional_object_count, self.per_object_assignment, ] for x in self.measurements: settings += x.pipeline_settings() for x in self.additional_objects: settings += [x.object_name, x.target_name] return settings def help_settings(self): return [ self.x_name, self.y_name, self.mode, self.filter_choice, self.per_object_assignment, self.rules_directory, self.rules_file_name, self.rules_class, self.enclosing_object_name, self.additional_object_button, ] def visible_settings(self): visible_settings = super(FilterObjects, self).visible_settings() visible_settings += [self.spacer_2, self.mode] if self.mode == MODE_RULES or self.mode == MODE_CLASSIFIERS: visible_settings += [ self.rules_file_name, self.rules_directory, self.rules_class, ] self.rules_class.text = ( "Class number" if self.mode == MODE_RULES else "Class name" ) try: self.rules_class.test_valid(None) except: pass elif self.mode == MODE_MEASUREMENTS: visible_settings += [self.spacer_1, self.filter_choice] if self.filter_choice in (FI_MINIMAL, FI_MAXIMAL): visible_settings += [ self.measurements[0].measurement, self.measurements[0].divider, ] elif self.filter_choice in (FI_MINIMAL_PER_OBJECT, FI_MAXIMAL_PER_OBJECT): visible_settings += [ self.per_object_assignment, self.measurements[0].measurement, self.enclosing_object_name, self.measurements[0].divider, ] elif self.filter_choice == FI_LIMITS: for i, group in enumerate(self.measurements): visible_settings += [group.measurement, group.wants_minimum] if group.wants_minimum: visible_settings.append(group.min_limit) visible_settings.append(group.wants_maximum) if group.wants_maximum.value: visible_settings.append(group.max_limit) if i > 0: visible_settings += [group.remover] visible_settings += [group.divider] visible_settings += [self.add_measurement_button] visible_settings.append(self.spacer_3) for x in self.additional_objects: visible_settings += x.visible_settings() visible_settings += [self.additional_object_button] return visible_settings def validate_module(self, pipeline): """Make sure that the user has selected some limits when filtering""" if self.mode == MODE_MEASUREMENTS and self.filter_choice == FI_LIMITS: for group in self.measurements: if not (group.wants_minimum.value or group.wants_maximum.value): raise ValidationError( "Please enter a minimum and/or maximum limit for your measurement", group.wants_minimum, ) if self.mode == MODE_RULES: try: rules = self.get_rules() except Exception as instance: logging.warning( "Failed to load rules: %s", str(instance), exc_info=True ) raise ValidationError(str(instance), self.rules_file_name) measurement_columns = pipeline.get_measurement_columns(self) for r in rules.rules: if not any( [ mc[0] == r.object_name and mc[1] == r.feature for mc in measurement_columns ] ): raise ValidationError( ( "The rules file, %s, uses the measurement, %s " "for object %s, but that measurement is not available " "at this stage of the pipeline. Consider editing the " "rules to match the available measurements or adding " "measurement modules to produce the measurement." ) % (self.rules_file_name, r.feature, r.object_name), self.rules_file_name, ) elif self.mode == MODE_CLASSIFIERS: try: self.get_classifier() self.get_bin_labels() self.get_classifier_features() except IOError: raise ValidationError( "Failed to load classifier file %s" % self.rules_file_name.value, self.rules_file_name, ) except: raise ValidationError( "Unable to load %s as a classifier file" % self.rules_file_name.value, self.rules_file_name, ) def run(self, workspace): """Filter objects for this image set, display results""" src_objects = workspace.get_objects(self.x_name.value) if self.mode == MODE_RULES: indexes = self.keep_by_rules(workspace, src_objects) elif self.mode == MODE_MEASUREMENTS: if self.filter_choice in (FI_MINIMAL, FI_MAXIMAL): indexes = self.keep_one(workspace, src_objects) if self.filter_choice in (FI_MINIMAL_PER_OBJECT, FI_MAXIMAL_PER_OBJECT): indexes = self.keep_per_object(workspace, src_objects) if self.filter_choice == FI_LIMITS: indexes = self.keep_within_limits(workspace, src_objects) elif self.mode == MODE_BORDER: indexes = self.discard_border_objects(src_objects) elif self.mode == MODE_CLASSIFIERS: indexes = self.keep_by_class(workspace, src_objects) else: raise ValueError("Unknown filter choice: %s" % self.mode.value) # # Create an array that maps label indexes to their new values # All labels to be deleted have a value in this array of zero # new_object_count = len(indexes) max_label = numpy.max(src_objects.segmented) label_indexes = numpy.zeros((max_label + 1,), int) label_indexes[indexes] = numpy.arange(1, new_object_count + 1) # # Loop over both the primary and additional objects # object_list = [(self.x_name.value, self.y_name.value)] + [ (x.object_name.value, x.target_name.value) for x in self.additional_objects ] m = workspace.measurements for src_name, target_name in object_list: src_objects = workspace.get_objects(src_name) target_labels = src_objects.segmented.copy() # # Reindex the labels of the old source image # target_labels[target_labels > max_label] = 0 target_labels = label_indexes[target_labels] # # Make a new set of objects - retain the old set's unedited # segmentation for the new and generally try to copy stuff # from the old to the new. # target_objects = cellprofiler_core.object.Objects() target_objects.segmented = target_labels target_objects.unedited_segmented = src_objects.unedited_segmented # # Remove the filtered objects from the small_removed_segmented # if present. "small_removed_segmented" should really be # "filtered_removed_segmented". # small_removed = src_objects.small_removed_segmented.copy() small_removed[(target_labels == 0) & (src_objects.segmented != 0)] = 0 target_objects.small_removed_segmented = small_removed if src_objects.has_parent_image: target_objects.parent_image = src_objects.parent_image workspace.object_set.add_objects(target_objects, target_name) self.add_measurements(workspace, src_name, target_name) if self.show_window: workspace.display_data.src_objects_segmented = src_objects.segmented workspace.display_data.target_objects_segmented = target_objects.segmented workspace.display_data.dimensions = src_objects.dimensions def display(self, workspace, figure): """Display what was filtered""" src_name = self.x_name.value src_objects_segmented = workspace.display_data.src_objects_segmented target_objects_segmented = workspace.display_data.target_objects_segmented dimensions = workspace.display_data.dimensions target_name = self.y_name.value figure.set_subplots((2, 2), dimensions=dimensions) figure.subplot_imshow_labels( 0, 0, src_objects_segmented, title="Original: %s" % src_name ) figure.subplot_imshow_labels( 1, 0, target_objects_segmented, title="Filtered: %s" % target_name, sharexy=figure.subplot(0, 0), ) statistics = [ [numpy.max(src_objects_segmented)], [numpy.max(target_objects_segmented)], ] figure.subplot_table( 0, 1, statistics, row_labels=( "Number of objects pre-filtering", "Number of objects post-filtering", ), ) def keep_one(self, workspace, src_objects): """Return an array containing the single object to keep workspace - workspace passed into Run src_objects - the Objects instance to be filtered """ measurement = self.measurements[0].measurement.value src_name = self.x_name.value values = workspace.measurements.get_current_measurement(src_name, measurement) if len(values) == 0: return numpy.array([], int) best_idx = ( numpy.argmax(values) if self.filter_choice == FI_MAXIMAL else numpy.argmin(values) ) + 1 return numpy.array([best_idx], int) def keep_per_object(self, workspace, src_objects): """Return an array containing the best object per enclosing object workspace - workspace passed into Run src_objects - the Objects instance to be filtered """ measurement = self.measurements[0].measurement.value src_name = self.x_name.value enclosing_name = self.enclosing_object_name.value src_objects = workspace.get_objects(src_name) enclosing_objects = workspace.get_objects(enclosing_name) enclosing_labels = enclosing_objects.segmented enclosing_max = enclosing_objects.count if enclosing_max == 0: return numpy.array([], int) enclosing_range = numpy.arange(1, enclosing_max + 1) # # Make a vector of the value of the measurement per label index. # We can then label each pixel in the image with the measurement # value for the object at that pixel. # For unlabeled pixels, put the minimum value if looking for the # maximum value and vice-versa # values = workspace.measurements.get_current_measurement(src_name, measurement) wants_max = self.filter_choice == FI_MAXIMAL_PER_OBJECT src_labels = src_objects.segmented src_count = src_objects.count if self.per_object_assignment == PO_PARENT_WITH_MOST_OVERLAP: # # Find the number of overlapping pixels in enclosing # and source objects # mask = enclosing_labels * src_labels != 0 enclosing_labels = enclosing_labels[mask] src_labels = src_labels[mask] order = numpy.lexsort((enclosing_labels, src_labels)) src_labels = src_labels[order] enclosing_labels = enclosing_labels[order] firsts = numpy.hstack( ( [0], numpy.where( (src_labels[:-1] != src_labels[1:]) | (enclosing_labels[:-1] != enclosing_labels[1:]) )[0] + 1, [len(src_labels)], ) ) areas = firsts[1:] - firsts[:-1] enclosing_labels = enclosing_labels[firsts[:-1]] src_labels = src_labels[firsts[:-1]] # # Re-sort by source label value and area descending # if wants_max: svalues = -values else: svalues = values order = numpy.lexsort((-areas, svalues[src_labels - 1])) src_labels, enclosing_labels, areas = [ x[order] for x in (src_labels, enclosing_labels, areas) ] firsts = numpy.hstack( ( [0], numpy.where(src_labels[:-1] != src_labels[1:])[0] + 1, src_labels.shape[:1], ) ) counts = firsts[1:] - firsts[:-1] # # Process them in order. The maximal or minimal child # will be assigned to the most overlapping parent and that # parent will be excluded. # best_src_label = numpy.zeros(enclosing_max + 1, int) for idx, count in zip(firsts[:-1], counts): for i in range(count): enclosing_object_number = enclosing_labels[idx + i] if best_src_label[enclosing_object_number] == 0: best_src_label[enclosing_object_number] = src_labels[idx] break # # Remove best source labels = 0 and sort to get the list # best_src_label = best_src_label[best_src_label != 0] best_src_label.sort() return best_src_label else: tricky_values = numpy.zeros((len(values) + 1,)) tricky_values[1:] = values if wants_max: tricky_values[0] = -numpy.Inf else: tricky_values[0] = numpy.Inf src_values = tricky_values[src_labels] # # Now find the location of the best for each of the enclosing objects # fn = ( scipy.ndimage.maximum_position if wants_max else scipy.ndimage.minimum_position ) best_pos = fn(src_values, enclosing_labels, enclosing_range) best_pos = numpy.array( (best_pos,) if isinstance(best_pos, tuple) else best_pos ) best_pos = best_pos.astype(numpy.uint32) # # Get the label of the pixel at each location # indexes = src_labels[best_pos.transpose().tolist()] indexes = set(indexes) indexes = list(indexes) indexes.sort() return indexes[1:] if len(indexes) > 0 and indexes[0] == 0 else indexes def keep_within_limits(self, workspace, src_objects): """Return an array containing the indices of objects to keep workspace - workspace passed into Run src_objects - the Objects instance to be filtered """ src_name = self.x_name.value hits = None m = workspace.measurements for group in self.measurements: measurement = group.measurement.value values = m.get_current_measurement(src_name, measurement) if hits is None: hits = numpy.ones(len(values), bool) elif len(hits) < len(values): temp = numpy.ones(len(values), bool) temp[~hits] = False hits = temp low_limit = group.min_limit.value high_limit = group.max_limit.value if group.wants_minimum.value: hits[values < low_limit] = False if group.wants_maximum.value: hits[values > high_limit] = False indexes = numpy.argwhere(hits)[:, 0] indexes = indexes + 1 return indexes def discard_border_objects(self, src_objects): """Return an array containing the indices of objects to keep workspace - workspace passed into Run src_objects - the Objects instance to be filtered """ labels = src_objects.segmented if src_objects.has_parent_image and src_objects.parent_image.has_mask: mask = src_objects.parent_image.mask interior_pixels = scipy.ndimage.binary_erosion(mask) else: interior_pixels = scipy.ndimage.binary_erosion(numpy.ones_like(labels)) border_pixels = numpy.logical_not(interior_pixels) border_labels = set(labels[border_pixels]) if ( border_labels == {0} and src_objects.has_parent_image and src_objects.parent_image.has_mask ): # The assumption here is that, if nothing touches the border, # the mask is a large, elliptical mask that tells you where the # well is. That's the way the old Matlab code works and it's duplicated here # # The operation below gets the mask pixels that are on the border of the mask # The erosion turns all pixels touching an edge to zero. The not of this # is the border + formerly masked-out pixels. mask = src_objects.parent_image.mask interior_pixels = scipy.ndimage.binary_erosion(mask) border_pixels = numpy.logical_not(interior_pixels) border_labels = set(labels[border_pixels]) return list(set(labels.ravel()).difference(border_labels)) def get_rules(self): """Read the rules from a file""" rules_file = self.rules_file_name.value rules_directory = self.rules_directory.get_absolute_path() path = os.path.join(rules_directory, rules_file) if not os.path.isfile(path): raise ValidationError("No such rules file: %s" % path, self.rules_file_name) else: rules = cellprofiler.utilities.rules.Rules() rules.parse(path) return rules def load_classifier(self): """Load the classifier pickle if not cached returns classifier, bin_labels, name and features """ d = self.get_dictionary() file_ = self.rules_file_name.value directory_ = self.rules_directory.get_absolute_path() path_ = os.path.join(directory_, file_) if path_ not in d: if not os.path.isfile(path_): raise ValidationError( "No such classifier file: %s" % path_, self.rules_file_name ) else: import joblib d[path_] = joblib.load(path_) return d[path_] def get_classifier(self): return self.load_classifier()[0] def get_bin_labels(self): return self.load_classifier()[1] def get_classifier_features(self): return self.load_classifier()[3] def keep_by_rules(self, workspace, src_objects): """Keep objects according to rules workspace - workspace holding the measurements for the rules src_objects - filter these objects (uses measurement indexes instead) Open the rules file indicated by the settings and score the objects by the rules. Return the indexes of the objects that pass. """ rules = self.get_rules() rules_class = int(self.rules_class.value) - 1 scores = rules.score(workspace.measurements) if len(scores) > 0: is_not_nan = numpy.any(~numpy.isnan(scores), 1) best_class = numpy.argmax(scores[is_not_nan], 1).flatten() hits = numpy.zeros(scores.shape[0], bool) hits[is_not_nan] = best_class == rules_class indexes = numpy.argwhere(hits).flatten() + 1 else: indexes = numpy.array([], int) return indexes def keep_by_class(self, workspace, src_objects): """ Keep objects according to their predicted class :param workspace: workspace holding the measurements for the rules :param src_objects: filter these objects (uses measurement indexes instead) :return: indexes (base 1) of the objects that pass """ classifier = self.get_classifier() target_idx = self.get_bin_labels().index(self.rules_class.value) target_class = classifier.classes_[target_idx] features = [] for feature_name in self.get_classifier_features(): feature_name = feature_name.split("_", 1)[1] if feature_name == "x_loc": feature_name = M_LOCATION_CENTER_X elif feature_name == "y_loc": feature_name = M_LOCATION_CENTER_Y features.append(feature_name) feature_vector = numpy.column_stack( [ workspace.measurements[self.x_name.value, feature_name] for feature_name in features ] ) predicted_classes = classifier.predict(feature_vector) hits = predicted_classes == target_class indexes = numpy.argwhere(hits) + 1 return indexes.flatten() def get_measurement_columns(self, pipeline): return super(FilterObjects, self).get_measurement_columns( pipeline, additional_objects=[ (x.object_name.value, x.target_name.value) for x in self.additional_objects ], ) def prepare_to_create_batch(self, workspace, fn_alter_path): """Prepare to create a batch file This function is called when CellProfiler is about to create a file for batch processing. It will pickle the image set list's "legacy_fields" dictionary. This callback lets a module prepare for saving. pipeline - the pipeline to be saved image_set_list - the image set list to be saved fn_alter_path - this is a function that takes a pathname on the local host and returns a pathname on the remote host. It handles issues such as replacing backslashes and mapping mountpoints. It should be called for every pathname stored in the settings or legacy fields. """ self.rules_directory.alter_for_create_batch_files(fn_alter_path) return True def upgrade_settings(self, setting_values, variable_revision_number, module_name): if variable_revision_number == 1: # # Added CPA rules # setting_values = ( setting_values[:11] + [MODE_MEASUREMENTS, DEFAULT_INPUT_FOLDER_NAME, ".",] + setting_values[11:] ) variable_revision_number = 2 if variable_revision_number == 2: # # Forgot file name (???!!!) # setting_values = setting_values[:14] + ["rules.txt"] + setting_values[14:] variable_revision_number = 3 if variable_revision_number == 3: # # Allowed multiple measurements # Structure changed substantially. # ( target_name, object_name, measurement, filter_choice, enclosing_objects, wants_minimum, minimum_value, wants_maximum, maximum_value, wants_outlines, outlines_name, rules_or_measurements, rules_directory_choice, rules_path_name, rules_file_name, ) = setting_values[:15] additional_object_settings = setting_values[15:] additional_object_count = len(additional_object_settings) // 4 setting_values = [ target_name, object_name, rules_or_measurements, filter_choice, enclosing_objects, wants_outlines, outlines_name, rules_directory_choice, rules_path_name, rules_file_name, "1", str(additional_object_count), measurement, wants_minimum, minimum_value, wants_maximum, maximum_value, ] + additional_object_settings variable_revision_number = 4 if variable_revision_number == 4: # # Used Directory to combine directory choice & custom path # rules_directory_choice = setting_values[7] rules_path_name = setting_values[8] if rules_directory_choice == DIR_CUSTOM: rules_directory_choice = ABSOLUTE_FOLDER_NAME if rules_path_name.startswith("."): rules_directory_choice = DEFAULT_INPUT_SUBFOLDER_NAME elif rules_path_name.startswith("&"): rules_directory_choice = DEFAULT_OUTPUT_SUBFOLDER_NAME rules_path_name = "." + rules_path_name[1:] rules_directory = Directory.static_join_string( rules_directory_choice, rules_path_name ) setting_values = setting_values[:7] + [rules_directory] + setting_values[9:] variable_revision_number = 5 if variable_revision_number == 5: # # added rules class # setting_values = setting_values[:9] + ["1"] + setting_values[9:] variable_revision_number = 6 if variable_revision_number == 6: # # Added per-object assignment # setting_values = ( setting_values[:FIXED_SETTING_COUNT_V6] + [PO_BOTH] + setting_values[FIXED_SETTING_COUNT_V6:] ) variable_revision_number = 7 if variable_revision_number == 7: x_name = setting_values[1] y_name = setting_values[0] measurement_count = int(setting_values[10]) additional_object_count = int(setting_values[11]) n_measurement_settings = measurement_count * 5 additional_object_settings = setting_values[13 + n_measurement_settings :] additional_object_names = additional_object_settings[::4] additional_target_names = additional_object_settings[1::4] new_additional_object_settings = sum( [ [object_name, target_name] for object_name, target_name in zip( additional_object_names, additional_target_names ) ], [], ) setting_values = ( [x_name, y_name] + setting_values[2:5] + setting_values[7 : 13 + n_measurement_settings] + new_additional_object_settings ) variable_revision_number = 8 slot_directory = 5 setting_values[slot_directory] = Directory.upgrade_setting( setting_values[slot_directory] ) return setting_values, variable_revision_number