def segmentation_for_img(raw_xy, predictions_xyc, multicut_workflow): """ Parameters ---------- raw_xy : vigra.VigraArray predictions_xyc : vigra.VigraArray multicut_workflow Returns ------- """ assert are_same_xy(raw_xy, predictions_xyc) # move these into setup_multicut? ##### opEdgeTrainingWithMulticut = multicut_workflow.edgeTrainingWithMulticutApplet.topLevelOperator assert isinstance(opEdgeTrainingWithMulticut, OpEdgeTrainingWithMulticut) opDataExport = multicut_workflow.dataExportApplet.topLevelOperator opDataExport.OutputAxisOrder.setValue('xy') ##### role_data_dict = OrderedDict([ ("Raw Data", [DatasetInfo(preloaded_array=raw_xy)]), ("Probabilities", [DatasetInfo(preloaded_array=predictions_xyc)]) ]) batch_results = multicut_workflow.batchProcessingApplet.run_export( role_data_dict, export_to_array=True) assert len(batch_results) == 1 segmentation_xy = vigra.taggedView(batch_results[0], axistags='xy') assert are_same_xy(segmentation_xy, raw_xy, predictions_xyc) return segmentation_xy
def test_h5_stack_via_star_file_glob_and_defined_inner_path(h5_stack_dir, empty_project_file: h5py.File): h5_external_star_glob = os.path.join(h5_stack_dir, "*.h5") internal_path = DatasetInfo.globInternalPaths(h5_external_star_glob, "*")[0] total_path = os.path.join(h5_stack_dir, "*.h5", internal_path) info = FilesystemDatasetInfo(filePath=total_path, sequence_axis="z", project_file=empty_project_file) assert info.nickname == "2d_apoptotic_binary_-volume-data" assert info.is_under_project_file()
def test_expand_path(h5_stack_dir): expansions = [Path(p) for p in DatasetInfo.expand_path(os.path.join(h5_stack_dir, "*"))] expected_file_paths = [ Path(h5_stack_dir) / "2d_apoptotic_binary_0.h5", Path(h5_stack_dir) / "2d_apoptotic_binary_1.h5", Path(h5_stack_dir) / "2d_apoptotic_binary_2.h5", ] assert expansions == expected_file_paths expected_dataset_paths = [Path(fp) / "volume/data" for fp in expected_file_paths] expansions = [Path(p) for p in DatasetInfo.expand_path(os.path.join(h5_stack_dir, "*.h5", "vol*"))] assert expansions == expected_dataset_paths expansions = [Path(p) for p in DatasetInfo.expand_path(os.path.join(h5_stack_dir, "2d_apoptotic_binary_1.h5"))] assert expansions == expected_file_paths[1:2] relative_paths = ["2d_apoptotic_binary_0.h5", "2d_apoptotic_binary_1.h5", "2d_apoptotic_binary_2.h5"] relative_paths_with_colon = os.path.pathsep.join(relative_paths) expansions = [Path(p) for p in DatasetInfo.expand_path(relative_paths_with_colon, cwd=h5_stack_dir)] assert expansions == expected_file_paths
def handleImportLabelsAction(): fileNames = ImageFileDialog( self, preferences_group="DataSelection", preferences_setting="recent image").getSelectedPaths() fileNames = list(map(str, fileNames)) # For now, we require a single hdf5 file if len(fileNames) > 1: QMessageBox.critical( self, "Too many files", "Labels must be contained in a single hdf5 volume.") return if len(fileNames) == 0: # user cancelled return file_path = fileNames[0] internal_paths = DatasetInfo.getPossibleInternalPathsFor(file_path) if len(internal_paths) == 0: QMessageBox.critical( self, "No volumes in file", "Couldn't find a suitable dataset in your hdf5 file.") return if len(internal_paths) == 1: internal_path = internal_paths[0] else: dlg = SubvolumeSelectionDlg(internal_paths, self) if dlg.exec_() == QDialog.Rejected: return selected_index = dlg.combo.currentIndex() internal_path = str(internal_paths[selected_index]) path_components = PathComponents(file_path) path_components.internalPath = str(internal_path) try: top_op = self.topLevelOperatorView opReader = OpInputDataReader(parent=top_op.parent) opReader.FilePath.setValue(path_components.totalPath()) # Reorder the axes op5 = OpReorderAxes(parent=top_op.parent) op5.AxisOrder.setValue(top_op.LabelInputs.meta.getAxisKeys()) op5.Input.connect(opReader.Output) # Finally, import the labels top_op.importLabels(top_op.current_view_index(), op5.Output) finally: op5.cleanUp() opReader.cleanUp()
def _append_lane(workflow, input_filepath, axisorder=None): """ Add a lane to the project file for the given input file. If axisorder is given, override the default axisorder for the file and force the project to use the given one. Globstrings are supported, in which case the files are converted to HDF5 first. """ # If the filepath is a globstring, convert the stack to h5 # todo: skip this? tmp_dir = tempfile.mkdtemp() input_filepath = DataSelectionApplet.convertStacksToH5([input_filepath], tmp_dir)[0] try: os.rmdir(tmp_dir) except OSError as e: if e.errno == 39: logger.warning( 'Temporary directory {} was populated: should be deleted') else: raise info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.filePath = input_filepath comp = PathComponents(input_filepath) # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(input_filepath): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase if axisorder: info.axistags = vigra.defaultAxistags(axisorder) logger.debug("adding lane: {}".format(info)) opDataSelection = workflow.dataSelectionApplet.topLevelOperator # Add a lane num_lanes = len(opDataSelection.DatasetGroup) + 1 logger.debug("num_lanes: {}".format(num_lanes)) opDataSelection.DatasetGroup.resize(num_lanes) # Configure it. role_index = 0 # raw data opDataSelection.DatasetGroup[-1][role_index].setValue(info) workflow.handleNewLanesAdded()
def ilastik_multicut(grayscale, bounary_volume, supervoxels, ilp_path, LAZYFLOW_THREADS=1, LAZYFLOW_TOTAL_RAM_MB=None, logfile="/dev/null", extra_cmdline_args=[]): print 'status=multicut' print "Starting ilastik_multicut() ..." print "grayscale volume: dtype={}, shape={}".format( str(grayscale.dtype), grayscale.shape) print "boundary volume: dtype={}, shape={}".format( str(bounary_volume.dtype), bounary_volume.shape) print "supervoxels volume: dtype={}, shape={}".format( str(supervoxels.dtype), supervoxels.shape) import os from collections import OrderedDict import uuid import multiprocessing import platform import psutil import vigra import ilastik_main from ilastik.applets.dataSelection import DatasetInfo print "ilastik_multicut(): Done with imports" if LAZYFLOW_TOTAL_RAM_MB is None: # By default, assume our alotted RAM is proportional # to the CPUs we've been told to use machine_ram = psutil.virtual_memory().total machine_ram -= 1024**3 # Leave 1 GB RAM for the OS. LAZYFLOW_TOTAL_RAM_MB = LAZYFLOW_THREADS * machine_ram / multiprocessing.cpu_count( ) # Before we start ilastik, prepare the environment variable settings. os.environ["LAZYFLOW_THREADS"] = str(LAZYFLOW_THREADS) os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(LAZYFLOW_TOTAL_RAM_MB) os.environ["LAZYFLOW_STATUS_MONITOR_SECONDS"] = "10" extra_cmdline_args += ['--output_axis_order=zyx'] # Prepare ilastik's "command-line" arguments, as if they were already parsed. args, extra_workflow_cmdline_args = ilastik_main.parser.parse_known_args( extra_cmdline_args) args.headless = True args.debug = True # ilastik's 'debug' flag enables special power features, including experimental workflows. args.project = str(ilp_path) args.readonly = True # The process_name argument is prefixed to all log messages. # For now, just use the machine name and a uuid # FIXME: It would be nice to provide something more descriptive, like the ROI of the current spark job... args.process_name = platform.node() + "-" + str(uuid.uuid1()) # To avoid conflicts between processes, give each process it's own logfile to write to. if logfile != "/dev/null": base, ext = os.path.splitext(logfile) logfile = base + '.' + args.process_name + ext # By default, all ilastik processes duplicate their console output to ~/.ilastik_log.txt # Obviously, having all spark nodes write to a common file is a bad idea. # The "/dev/null" setting here is recognized by ilastik and means "Don't write a log file" args.logfile = logfile print "ilastik_multicut(): Creating shell..." # Instantiate the 'shell', (in this case, an instance of ilastik.shell.HeadlessShell) # This also loads the project file into shell.projectManager shell = ilastik_main.main(args, extra_workflow_cmdline_args) ## Need to find a better way to verify the workflow type #from ilastik.workflows.multicutWorkflow import MulticutWorkflow #assert isinstance(shell.workflow, MulticutWorkflow) # Construct an OrderedDict of role-names -> DatasetInfos # (See MulticutWorkflow.ROLE_NAMES) raw_data_array = vigra.taggedView(grayscale, 'zyx') probabilities_array = vigra.taggedView(bounary_volume, 'zyxc') superpixels_array = vigra.taggedView(supervoxels, 'zyx') role_data_dict = OrderedDict([ ("Raw Data", [DatasetInfo(preloaded_array=raw_data_array)]), ("Probabilities", [DatasetInfo(preloaded_array=probabilities_array)]), ("Superpixels", [DatasetInfo(preloaded_array=superpixels_array)]) ]) print "ilastik_multicut(): Starting export..." # Run the export via the BatchProcessingApplet segmentation_list = shell.workflow.batchProcessingApplet.run_export( role_data_dict, export_to_array=True) assert len(segmentation_list) == 1 segmentation = segmentation_list[0] assert segmentation.ndim == 3 print 'status=multicut finished' return segmentation
# Tagging the data this way ensures that ilastik interprets the axes correctly. input_data1 = vigra.taggedView(input_data1, 'yxc') input_data2 = vigra.taggedView(input_data2, 'yxc') # In case you're curious about which label class is which, # let's read the label names from the project file. label_names = opPixelClassification.LabelNames.value label_colors = opPixelClassification.LabelColors.value probability_colors = opPixelClassification.PmapColors.value print label_names, label_colors, probability_colors # Construct an OrderedDict of role-names -> DatasetInfos # (See PixelClassificationWorkflow.ROLE_NAMES) role_data_dict = OrderedDict([("Raw Data", [ DatasetInfo(preloaded_array=input_data1), DatasetInfo(preloaded_array=input_data2) ])]) ## Note: If you want to pull your data from disk instead of in-memory, just provide filepaths like so: # role_data_dict = OrderedDict([ ("Raw Data", [ '/path/to/input-file-1.png', # '/path/to/input-file-2.h5/mydata' ]) ]) # Run the export via the BatchProcessingApplet # Note: If you don't provide export_to_array, then the results will # be exported to disk accordering to your project's DataExport settings. # In that case, run_export() returns None. predictions = shell.workflow.batchProcessingApplet.run_export( role_data_dict, export_to_array=True) print "Computed {} result arrays:".format(len(predictions))
def classify_pixel(input_data, classifier, threads=8, ram=4000): """ Runs a pre-trained ilastik classifier on a volume of data Adapted from Stuart Berg's example here: https://github.com/ilastik/ilastik/blob/master/examples/example_python_client.py Arguments: input_data: data to be classified - 3D numpy array classifier: ilastik trained/classified file threads: number of thread to use for classifying input data ram: RAM to use in MB Returns: pixel_out: The raw trained classifier """ import numpy as np import six import pdb from collections import OrderedDict import vigra import os import ilastik_main from ilastik.applets.dataSelection import DatasetInfo from ilastik.workflows.pixelClassification import PixelClassificationWorkflow # Before we start ilastik, prepare these environment variable settings. os.environ["LAZYFLOW_THREADS"] = str(threads) os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(ram) # Set the command-line arguments directly into argparse.Namespace object # Provide your project file, and don't forget to specify headless. args = ilastik_main.parser.parse_args([]) args.headless = True args.project = classifier # Instantiate the 'shell', (an instance of ilastik.shell.HeadlessShell) # This also loads the project file into shell.projectManager shell = ilastik_main.main(args) assert isinstance(shell.workflow, PixelClassificationWorkflow) # Obtain the training operator opPixelClassification = shell.workflow.pcApplet.topLevelOperator # Sanity checks assert len(opPixelClassification.InputImages) > 0 assert opPixelClassification.Classifier.ready() # For this example, we'll use random input data to "batch process" print("input_data.shape", input_data.shape) # In this example, we're using 2D data (extra dimension for channel). # Tagging the data ensures that ilastik interprets the axes correctly. input_data = vigra.taggedView(input_data, 'xyz') # In case you're curious about which label class is which, # let's read the label names from the project file. label_names = opPixelClassification.LabelNames.value label_colors = opPixelClassification.LabelColors.value probability_colors = opPixelClassification.PmapColors.value print("label_names, label_colors, probability_colors", label_names, label_colors, probability_colors) # Construct an OrderedDict of role-names -> DatasetInfos # (See PixelClassificationWorkflow.ROLE_NAMES) role_data_dict = OrderedDict([("Raw Data", [DatasetInfo(preloaded_array=input_data)])]) # Run the export via the BatchProcessingApplet # Note: If you don't provide export_to_array, then the results will # be exported to disk according to project's DataExport settings. # In that case, run_export() returns None. predictions = shell.workflow.batchProcessingApplet.\ run_export(role_data_dict, export_to_array=True) predictions = np.squeeze(predictions) print("predictions.dtype, predictions.shape", predictions.dtype, predictions.shape) print("DONE.") return predictions
def run_ilastik_stage(stage_num, ilp_path, input_vol, mask, output_path, LAZYFLOW_THREADS=1, LAZYFLOW_TOTAL_RAM_MB=None, logfile="/dev/null", extra_cmdline_args=[]): import os from collections import OrderedDict import uuid import multiprocessing import platform import psutil import vigra import ilastik_main from ilastik.applets.dataSelection import DatasetInfo if LAZYFLOW_TOTAL_RAM_MB is None: # By default, assume our alotted RAM is proportional # to the CPUs we've been told to use machine_ram = psutil.virtual_memory().total machine_ram -= 1024**3 # Leave 1 GB RAM for the OS. LAZYFLOW_TOTAL_RAM_MB = LAZYFLOW_THREADS * machine_ram / multiprocessing.cpu_count( ) # Before we start ilastik, prepare the environment variable settings. os.environ["LAZYFLOW_THREADS"] = str(LAZYFLOW_THREADS) os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(LAZYFLOW_TOTAL_RAM_MB) os.environ["LAZYFLOW_STATUS_MONITOR_SECONDS"] = "10" # Prepare ilastik's "command-line" arguments, as if they were already parsed. args, extra_workflow_cmdline_args = ilastik_main.parser.parse_known_args( extra_cmdline_args) args.headless = True args.debug = True # ilastik's 'debug' flag enables special power features, including experimental workflows. args.project = ilp_path args.readonly = True # The process_name argument is prefixed to all log messages. # For now, just use the machine name and a uuid # FIXME: It would be nice to provide something more descriptive, like the ROI of the current spark job... args.process_name = platform.node() + "-" + str( uuid.uuid1()) + "-" + str(stage_num) # To avoid conflicts between processes, give each process it's own logfile to write to. if logfile != "/dev/null": base, ext = os.path.splitext(logfile) logfile = base + '.' + args.process_name + ext # By default, all ilastik processes duplicate their console output to ~/.ilastik_log.txt # Obviously, having all spark nodes write to a common file is a bad idea. # The "/dev/null" setting here is recognized by ilastik and means "Don't write a log file" args.logfile = logfile # Instantiate the 'shell', (in this case, an instance of ilastik.shell.HeadlessShell) # This also loads the project file into shell.projectManager shell = ilastik_main.main(args, extra_workflow_cmdline_args) ## Need to find a better way to verify the workflow type #from ilastik.workflows.pixelClassification import PixelClassificationWorkflow #assert isinstance(shell.workflow, PixelClassificationWorkflow) opInteractiveExport = shell.workflow.batchProcessingApplet.dataExportApplet.topLevelOperator.getLane( 0) opInteractiveExport.OutputFilenameFormat.setValue(output_path) opInteractiveExport.OutputInternalPath.setValue('predictions') opInteractiveExport.OutputFormat.setValue('hdf5') selected_result = opInteractiveExport.InputSelection.value num_channels = opInteractiveExport.Inputs[selected_result].meta.shape[-1] # Construct an OrderedDict of role-names -> DatasetInfos # (See PixelClassificationWorkflow.ROLE_NAMES) if isinstance(input_vol, str): role_data_dict = OrderedDict([("Raw Data", [DatasetInfo(filepath=input_vol)])]) else: # If given raw data, we assume it's grayscale, zyx order (stage 1) raw_data_array = vigra.taggedView(input_vol, 'zyx') role_data_dict = OrderedDict([ ("Raw Data", [DatasetInfo(preloaded_array=raw_data_array)]) ]) if mask is not None: # If there's a mask, we might be able to save some computation time. mask = vigra.taggedView(mask, 'zyx') role_data_dict["Prediction Mask"] = [DatasetInfo(preloaded_array=mask)] # Run the export via the BatchProcessingApplet export_paths = shell.workflow.batchProcessingApplet.run_export( role_data_dict, export_to_array=False) assert len(export_paths) == 1 assert export_paths[ 0] == output_path + '/predictions', "Output path was {}".format( export_paths[0])
def classify_pixel_hdf(hdf_data_set_name, classifier, threads, ram): """ Interface function to Ilastik object classifier functions.function Runs a pre-trained ilastik classifier on a volume of data given in an hdf5 file Adapted from Stuart Berg's example here: https://github.com/ilastik/ilastik/blob/master/examples/example_python_client.py Parameters: hdf_data_set_name: dataset to be classified - 3D numpy array classifier: ilastik trained/classified file threads: number of thread to use for classifying input data ram: RAM to use in MB Returns: pixel_out: The probability maps for the classified pixels """ # Before we start ilastik, prepare these environment variable settings. os.environ["LAZYFLOW_THREADS"] = str(threads) os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(ram) # Set the command-line arguments directly into argparse.Namespace object # Provide your project file, and don't forget to specify headless. args = ilastik_main.parser.parse_args([]) args.headless = True args.project = classifier # Instantiate the 'shell', (an instance of ilastik.shell.HeadlessShell) # This also loads the project file into shell.projectManager shell = ilastik_main.main(args) assert isinstance(shell.workflow, PixelClassificationWorkflow) # Obtain the training operator opPixelClassification = shell.workflow.pcApplet.topLevelOperator # Sanity checks assert len(opPixelClassification.InputImages) > 0 assert opPixelClassification.Classifier.ready() # In case you're curious about which label class is which, # let's read the label names from the project file. label_names = opPixelClassification.LabelNames.value label_colors = opPixelClassification.LabelColors.value probability_colors = opPixelClassification.PmapColors.value print("label_names, label_colors, probability_colors", label_names, label_colors, probability_colors) # Construct an OrderedDict of role-names -> DatasetInfos # (See PixelClassificationWorkflow.ROLE_NAMES) data_info = DatasetInfo(hdf_data_set_name) # Classifying a volume specified of dimensions of: slices, column and rows data_info.axistags = vigra.defaultAxistags('zyx'.encode('ascii')) role_data_dict = OrderedDict([("Raw Data", [data_info])]) # Run the export via the BatchProcessingApplet # Note: If you don't provide export_to_array, then the results will # be exported to disk according to project's DataExport settings. # In that case, run_export() returns None. hdf_dataset_path = shell.workflow.batchProcessingApplet.\ run_export(role_data_dict, export_to_array=False) print("DONE WITH CLASSIFICATION.") return hdf_dataset_path
def test_create_nickname_for_single_file_does_not_contain_extension( h5_colon_path_stack): expanded_paths = DatasetInfo.expand_path(h5_colon_path_stack) nickname = DatasetInfo.create_nickname(expanded_paths[0:1]) assert nickname == "2d_apoptotic_binary_0"
def test_create_nickname(h5_colon_path_stack): expanded_paths = DatasetInfo.expand_path(h5_colon_path_stack) nickname = DatasetInfo.create_nickname(expanded_paths) assert nickname == "2d_apoptotic_binary_"
def test_create_nickname_with_internal_paths( h5_colon_path_stack_with_inner_paths): expanded_paths = DatasetInfo.expand_path( h5_colon_path_stack_with_inner_paths) nickname = DatasetInfo.create_nickname(expanded_paths) assert nickname == "2d_apoptotic_binary_-volume-data"
def classify_pixel_hdf(hdf_data_set_name, classifier, threads, ram): """ Runs a pre-trained ilastik classifier on a volume of data given in an hdf5 file Adapted from Stuart Berg's example here: https://github.com/ilastik/ilastik/blob/master/examples/example_python_client.py Parameters: hdf_data_set_name: data to be classified - 3D numpy array classifier: ilastik trained/classified file threads: number of thread to use for classifying input data ram: RAM to use in MB Returns: pixel_out: The raw trained classifier """ # Before we start ilastik, prepare these environment variable settings. os.environ["LAZYFLOW_THREADS"] = str(threads) os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(ram) # Set the command-line arguments directly into argparse.Namespace object # Provide your project file, and don't forget to specify headless. args = ilastik_main.parser.parse_args([]) args.headless = True args.project = classifier # Instantiate the 'shell', (an instance of ilastik.shell.HeadlessShell) # This also loads the project file into shell.projectManager shell = ilastik_main.main(args) assert isinstance(shell.workflow, PixelClassificationWorkflow) # Obtain the training operator opPixelClassification = shell.workflow.pcApplet.topLevelOperator # Sanity checks assert len(opPixelClassification.InputImages) > 0 assert opPixelClassification.Classifier.ready() # In case you're curious about which label class is which, # let's read the label names from the project file. label_names = opPixelClassification.LabelNames.value label_colors = opPixelClassification.LabelColors.value probability_colors = opPixelClassification.PmapColors.value print("label_names, label_colors, probability_colors", label_names, label_colors, probability_colors) # Construct an OrderedDict of role-names -> DatasetInfos # (See PixelClassificationWorkflow.ROLE_NAMES) data_info = DatasetInfo(hdf_data_set_name) data_info.axistags = vigra.defaultAxistags('tyx'.encode('ascii')) role_data_dict = OrderedDict([("Raw Data", [data_info])]) # Run the export via the BatchProcessingApplet # Note: If you don't provide export_to_array, then the results will # be exported to disk according to project's DataExport settings. # In that case, run_export() returns None. hdf_dataset_path = shell.workflow.batchProcessingApplet.\ run_export(role_data_dict, export_to_array=False) print("DONE WITH CLASSIFICATION.") return hdf_dataset_path
def ilastik_predict_with_array(gray_vol, mask, ilp_path, selected_channels=None, normalize=True, LAZYFLOW_THREADS=1, LAZYFLOW_TOTAL_RAM_MB=None, logfile="/dev/null", extra_cmdline_args=[]): """ Using ilastik's python API, open the given project file and run a prediction on the given raw data array. Other than the project file, nothing is read or written using the hard disk. gray_vol: A 3D numpy array with axes zyx mask: A binary image where 0 means "no prediction necessary". 'None' can be given, which means "predict everything". ilp_path: Path to the project file. ilastik also accepts a url to a DVID key-value, which will be downloaded and opened as an ilp selected_channels: A list of channel indexes to select and return from the prediction results. 'None' can also be given, which means "return all prediction channels". You may also return a *nested* list, in which case groups of channels can be combined (summed) into their respective output channels. For example: selected_channels=[0,3,[2,4],7] means the output will have 4 channels: 0,3,2+4,7 (channels 5 and 6 are simply dropped). normalize: Renormalize all outputs so the channels sum to 1 everywhere. That is, (predictions.sum(axis=-1) == 1.0).all() Note: Pixels with 0.0 in all channels will be simply given a value of 1/N in all channels. LAZYFLOW_THREADS, LAZYFLOW_TOTAL_RAM_MB: Passed to ilastik via environment variables. """ print "ilastik_predict_with_array(): Starting with raw data: dtype={}, shape={}".format( str(gray_vol.dtype), gray_vol.shape) import os from collections import OrderedDict import uuid import multiprocessing import platform import psutil import vigra import ilastik_main from ilastik.applets.dataSelection import DatasetInfo from lazyflow.operators.cacheMemoryManager import CacheMemoryManager import logging logging.getLogger(__name__).info('status=ilastik prediction') print "ilastik_predict_with_array(): Done with imports" if LAZYFLOW_TOTAL_RAM_MB is None: # By default, assume our alotted RAM is proportional # to the CPUs we've been told to use machine_ram = psutil.virtual_memory().total machine_ram -= 1024**3 # Leave 1 GB RAM for the OS. LAZYFLOW_TOTAL_RAM_MB = LAZYFLOW_THREADS * machine_ram / multiprocessing.cpu_count( ) # Before we start ilastik, prepare the environment variable settings. os.environ["LAZYFLOW_THREADS"] = str(LAZYFLOW_THREADS) os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(LAZYFLOW_TOTAL_RAM_MB) os.environ["LAZYFLOW_STATUS_MONITOR_SECONDS"] = "10" # Prepare ilastik's "command-line" arguments, as if they were already parsed. args, extra_workflow_cmdline_args = ilastik_main.parser.parse_known_args( extra_cmdline_args) args.headless = True args.debug = True # ilastik's 'debug' flag enables special power features, including experimental workflows. args.project = str(ilp_path) args.readonly = True # The process_name argument is prefixed to all log messages. # For now, just use the machine name and a uuid # FIXME: It would be nice to provide something more descriptive, like the ROI of the current spark job... args.process_name = platform.node() + "-" + str(uuid.uuid1()) # To avoid conflicts between processes, give each process it's own logfile to write to. if logfile != "/dev/null": base, ext = os.path.splitext(logfile) logfile = base + '.' + args.process_name + ext # By default, all ilastik processes duplicate their console output to ~/.ilastik_log.txt # Obviously, having all spark nodes write to a common file is a bad idea. # The "/dev/null" setting here is recognized by ilastik and means "Don't write a log file" args.logfile = logfile print "ilastik_predict_with_array(): Creating shell..." # Instantiate the 'shell', (in this case, an instance of ilastik.shell.HeadlessShell) # This also loads the project file into shell.projectManager shell = ilastik_main.main(args, extra_workflow_cmdline_args) ## Need to find a better way to verify the workflow type #from ilastik.workflows.pixelClassification import PixelClassificationWorkflow #assert isinstance(shell.workflow, PixelClassificationWorkflow) # Construct an OrderedDict of role-names -> DatasetInfos # (See PixelClassificationWorkflow.ROLE_NAMES) raw_data_array = vigra.taggedView(gray_vol, 'zyx') role_data_dict = OrderedDict([ ("Raw Data", [DatasetInfo(preloaded_array=raw_data_array)]) ]) if mask is not None: # If there's a mask, we might be able to save some computation time. mask = vigra.taggedView(mask, 'zyx') role_data_dict["Prediction Mask"] = [DatasetInfo(preloaded_array=mask)] print "ilastik_predict_with_array(): Starting export..." # Sanity checks opInteractiveExport = shell.workflow.batchProcessingApplet.dataExportApplet.topLevelOperator.getLane( 0) selected_result = opInteractiveExport.InputSelection.value num_channels = opInteractiveExport.Inputs[selected_result].meta.shape[-1] # For convenience, verify the selected channels before we run the export. if selected_channels: assert isinstance(selected_channels, list) for selection in selected_channels: if isinstance(selection, list): assert all(c < num_channels for c in selection), \ "Selected channels ({}) exceed number of prediction classes ({})"\ .format( selected_channels, num_channels ) else: assert selection < num_channels, \ "Selected channels ({}) exceed number of prediction classes ({})"\ .format( selected_channels, num_channels ) # Run the export via the BatchProcessingApplet prediction_list = shell.workflow.batchProcessingApplet.run_export( role_data_dict, export_to_array=True) assert len(prediction_list) == 1 predictions = prediction_list[0] assert predictions.shape[-1] == num_channels selected_predictions = select_channels(predictions, selected_channels) if normalize: normalize_channels_in_place(selected_predictions) # Cleanup: kill cache monitor thread CacheMemoryManager().stop() CacheMemoryManager.instance = None # Cleanup environment del os.environ["LAZYFLOW_THREADS"] del os.environ["LAZYFLOW_TOTAL_RAM_MB"] del os.environ["LAZYFLOW_STATUS_MONITOR_SECONDS"] logging.getLogger(__name__).info('status=ilastik prediction finished') return selected_predictions