def test_load_objects(): r = numpy.random.RandomState() r.seed(1101) labels = r.randint(0, 10, size=(30, 20)).astype(numpy.uint8) handle, name = tempfile.mkstemp(".png") bioformats.write_image(name, labels, bioformats.PT_UINT8) os.close(handle) png_path, png_file = os.path.split(name) sbs_dir = os.path.join(tests.modules.example_images_directory(), "ExampleSBSImages") csv_text = """%s_%s,%s_%s,%s_DNA,%s_DNA %s,%s,Channel2-01-A-01.tif,%s """ % ( cellprofiler.measurement.C_OBJECTS_FILE_NAME, OBJECTS_NAME, cellprofiler.measurement.C_OBJECTS_PATH_NAME, OBJECTS_NAME, cellprofiler.measurement.C_FILE_NAME, cellprofiler.measurement.C_PATH_NAME, png_file, png_path, sbs_dir, ) pipeline, module, csv_name = make_pipeline(csv_text) assert isinstance(pipeline, cellprofiler.pipeline.Pipeline) assert isinstance(module, cellprofiler.modules.loaddata.LoadData) module.wants_images.value = True try: image_set_list = cellprofiler.image.ImageSetList() measurements = cellprofiler.measurement.Measurements() workspace = cellprofiler.workspace.Workspace(pipeline, module, None, None, measurements, image_set_list) pipeline.prepare_run(workspace) key_names, g = pipeline.get_groupings(workspace) assert len(g) == 1 module.prepare_group(workspace, g[0][0], g[0][1]) image_set = image_set_list.get_image_set(g[0][1][0] - 1) object_set = cellprofiler.object.ObjectSet() workspace = cellprofiler.workspace.Workspace(pipeline, module, image_set, object_set, measurements, image_set_list) module.run(workspace) objects = object_set.get_objects(OBJECTS_NAME) assert numpy.all(objects.segmented == labels) assert (measurements.get_current_image_measurement( cellprofiler.measurement.FF_COUNT % OBJECTS_NAME) == 9) for feature in ( cellprofiler.measurement.M_LOCATION_CENTER_X, cellprofiler.measurement.M_LOCATION_CENTER_Y, cellprofiler.measurement.M_NUMBER_OBJECT_NUMBER, ): value = measurements.get_current_measurement(OBJECTS_NAME, feature) assert len(value) == 9 finally: bioformats.formatreader.clear_image_reader_cache() os.remove(name) os.remove(csv_name)
def test_group_on_two(): groups, workspace = make_image_sets( ( ("Plate", ("P-12345", "P-23456")), ("Well", ("A01", "A02", "A03")), ("Site", ("1", "2", "3", "4")), ), ( ( "DNA", "Wavelength", "1", cellprofiler.pipeline.Pipeline.ImageSetChannelDescriptor. CT_GRAYSCALE, ), ( "GFP", "Wavelength", "1", cellprofiler.pipeline.Pipeline.ImageSetChannelDescriptor. CT_GRAYSCALE, ), ), ) groups.wants_groups.value = True groups.grouping_metadata[0].metadata_choice.value = "Plate" groups.add_grouping_metadata() groups.grouping_metadata[1].metadata_choice.value = "Site" assert groups.prepare_run(workspace) m = workspace.measurements assert isinstance(m, cellprofiler.measurement.Measurements) image_numbers = m.get_image_numbers() pipeline = workspace.pipeline assert isinstance(pipeline, cellprofiler.pipeline.Pipeline) key_list, groupings = pipeline.get_groupings(workspace) assert len(key_list) == 2 assert key_list[0] == "Metadata_Plate" assert key_list[1] == "Metadata_Site" assert len(groupings) == 8 idx = 0 for plate in ("P-12345", "P-23456"): for site in ("1", "2", "3", "4"): grouping, image_set_list = groupings[idx] idx += 1 assert grouping["Metadata_Plate"] == plate assert grouping["Metadata_Site"] == site assert len(image_set_list) == 3 ftr = "_".join((cellprofiler.measurement.C_FILE_NAME, "DNA")) for image_number in image_set_list: file_name = m[cellprofiler.measurement.IMAGE, ftr, image_number] p, w, s, rest = file_name.split("_") assert p == plate assert s == site
def test_11_01_load_objects(self): r = numpy.random.RandomState() r.seed(1101) labels = r.randint(0, 10, size=(30, 20)).astype(numpy.uint8) handle, name = tempfile.mkstemp(".png") bioformats.write_image(name, labels, bioformats.PT_UINT8) os.close(handle) png_path, png_file = os.path.split(name) sbs_dir = os.path.join(tests.modules.example_images_directory(), "ExampleSBSImages") csv_text = """%s_%s,%s_%s,%s_DNA,%s_DNA %s,%s,Channel2-01-A-01.tif,%s """ % (cellprofiler.measurement.C_OBJECTS_FILE_NAME, OBJECTS_NAME, cellprofiler.measurement.C_OBJECTS_PATH_NAME, OBJECTS_NAME, cellprofiler.measurement.C_FILE_NAME, cellprofiler.measurement.C_PATH_NAME, png_file, png_path, sbs_dir) pipeline, module, csv_name = self.make_pipeline(csv_text) assert isinstance(pipeline, cellprofiler.pipeline.Pipeline) assert isinstance(module, cellprofiler.modules.loaddata.LoadData) module.wants_images.value = True try: image_set_list = cellprofiler.image.ImageSetList() measurements = cellprofiler.measurement.Measurements() workspace = cellprofiler.workspace.Workspace( pipeline, module, None, None, measurements, image_set_list) pipeline.prepare_run(workspace) key_names, g = pipeline.get_groupings(workspace) self.assertEqual(len(g), 1) module.prepare_group(workspace, g[0][0], g[0][1]) image_set = image_set_list.get_image_set(g[0][1][0] - 1) object_set = cellprofiler.object.ObjectSet() workspace = cellprofiler.workspace.Workspace(pipeline, module, image_set, object_set, measurements, image_set_list) module.run(workspace) objects = object_set.get_objects(OBJECTS_NAME) self.assertTrue(numpy.all(objects.segmented == labels)) self.assertEqual(measurements.get_current_image_measurement( cellprofiler.measurement.FF_COUNT % OBJECTS_NAME), 9) for feature in (cellprofiler.measurement.M_LOCATION_CENTER_X, cellprofiler.measurement.M_LOCATION_CENTER_Y, cellprofiler.measurement.M_NUMBER_OBJECT_NUMBER): value = measurements.get_current_measurement( OBJECTS_NAME, feature) self.assertEqual(len(value), 9) finally: bioformats.formatreader.clear_image_reader_cache() os.remove(name) os.remove(csv_name)
def test_group_on_one(): groups = cellprofiler.modules.groups.Groups() groups, workspace = make_image_sets( ( ("Plate", ("P-12345", "P-23456")), ("Well", ("A01", "A02", "A03")), ("Site", ("1", "2", "3", "4")), ), ( ( "DNA", "Wavelength", "1", cellprofiler.pipeline.Pipeline.ImageSetChannelDescriptor.CT_GRAYSCALE, ), ( "GFP", "Wavelength", "1", cellprofiler.pipeline.Pipeline.ImageSetChannelDescriptor.CT_GRAYSCALE, ), ), ) groups.wants_groups.value = True groups.grouping_metadata[0].metadata_choice.value = "Plate" groups.prepare_run(workspace) m = workspace.measurements assert isinstance(m, cellprofiler.measurement.Measurements) image_numbers = m.get_image_numbers() assert len(image_numbers) == 24 numpy.testing.assert_array_equal( numpy.hstack([numpy.ones(12, int), numpy.ones(12, int) * 2]), m[ cellprofiler.measurement.IMAGE, cellprofiler.measurement.GROUP_NUMBER, image_numbers, ], ) numpy.testing.assert_array_equal( numpy.hstack([numpy.arange(1, 13)] * 2), m[ cellprofiler.measurement.IMAGE, cellprofiler.measurement.GROUP_INDEX, image_numbers, ], ) pipeline = workspace.pipeline assert isinstance(pipeline, cellprofiler.pipeline.Pipeline) key_list, groupings = pipeline.get_groupings(workspace) assert len(key_list) == 1 assert key_list[0] == "Metadata_Plate" assert len(groupings) == 2 for group_number, plate, (grouping, image_set_list) in zip( (1, 2), ("P-12345", "P-23456"), groupings ): assert grouping == dict(Metadata_Plate=plate) assert len(image_set_list) == 3 * 4 assert list(image_set_list) == list( range((group_number - 1) * 12 + 1, group_number * 12 + 1) ) for image_number in range(1 + (group_number - 1) * 12, 1 + group_number * 12): for image_name in ("DNA", "GFP"): ftr = "_".join((cellprofiler.measurement.C_FILE_NAME, image_name)) assert m[cellprofiler.measurement.IMAGE, ftr, image_number].startswith( plate )
def make_jobs(args): # how to make this nicer? xvfb = args.xvfb software = args.software script = args.script script_arguments = args.script_arguments memory = args.memory queue = args.queue host_group = args.host_group num_jobs_max = 1000 print '' print 'make_cellprofiler_jobs_LSF:' print '' # # determine files to analyze # CELLPROFILERDIR = '/g/software/linux/pack/cellprofiler-2.0.11047/lib' sys.path.insert(0, CELLPROFILERDIR) # try importing cellprofiler modules global cellprofiler import cellprofiler import cellprofiler.pipeline import cellprofiler.workspace import cellprofiler.measurements import cellprofiler.cpimage # LOAD PIPELINE pipeline_file = script if not os.path.isfile(pipeline_file): raise Exception("-- ERROR: pipeline file not found") cp_plugin_directory = None if 'CP_PLUGIN_DIRECTORY' in os.environ: cp_plugin_directory = os.environ['CP_PLUGIN_DIRECTORY'] pipeline = cellprofiler.pipeline.Pipeline() def error_callback(event, caller): if (isinstance(event, cellprofiler.pipeline.LoadExceptionEvent) or isinstance(event, cellprofiler.pipeline.RunExceptionEvent)): sys.stdout.write("Handling exception: {}\n".format(event)) sys.stdout.write(traceback.format_exc()) sys.sydout.flush() pipeline.add_listener(error_callback) # pipeline.remove_listener(error_callback) # GET NUMBER OF IMAGES AND GROUPS # check wether we have a new or an old version of CellProfiler import inspect argspec = inspect.getargspec(pipeline.prepare_run) if argspec[0][1] == 'workspace' or len(argspec[0]) == 2: print 'New CellProfiler version' new_version = True else: print 'Old CellProfiler version' new_version = False if new_version: # this should work for plain pipeline files ... try: pipeline.load(pipeline_file) image_set_list = cellprofiler.cpimage.ImageSetList() measurements = cellprofiler.measurements.Measurements() workspace = cellprofiler.workspace.Workspace( pipeline, None, None, None, measurements, image_set_list ) grouping_argument = workspace result = pipeline.prepare_run(workspace) grouping_keys, groups = pipeline.get_groupings( grouping_argument ) pipeline.prepare_group( grouping_argument, groups[0][0], groups[0][1]) num_sets = image_set_list.count() except: import traceback traceback.print_exc() raise Exception('Unable to load pipeline file:', pipeline_file) # ... and this should work for files created with # the CreateBatchFile module measurements = cellprofiler.measurements.load_measurements( pipeline_file ) print 'Obtaining list of image sets...this can take a while...' image_set_list = measurements.get_image_numbers() grouping_keys = [] num_sets = len(image_set_list) else: try: pipeline.load(pipeline_file) except: import traceback traceback.print_exc() raise Exception('Unable to load pipeline file:', pipeline_file) workspace = None grouping_argument = workspace print 'Obtaining list of image sets...this can take a while...' result = pipeline.prepare_run(workspace) if not result: raise Exception("Failed to prepare running the pipeline") if not new_version: grouping_argument = result image_set_list = result grouping_keys, groups = pipeline.get_groupings(grouping_argument) if new_version: pipeline.prepare_group( grouping_argument, groups[0][0], groups[0][1]) num_sets = image_set_list.count() print("Image sets: {}".format(num_sets)) if num_sets == 0: print 'No image sets to process...finished' sys.exit(0) # GET IMAGE PATH input_dir = None # could be also an directory with image files if one does not use Batch_data.mat.... if input_dir is None: loadimage_module_name = 'LoadImages' cp_modules = pipeline.modules() loadimage_module = None for module in cp_modules: if module.module_name == loadimage_module_name: loadimage_module = module break if loadimage_module: input_dir = str(loadimage_module.location).partition('|')[2] print("Image path: {}".format(input_dir)) else: print '-- WARNING: The LoadImage module is not used in this' \ ' pipeline. Default input folder is undefined' #print('-- ERROR: Could not load the image module!') #sys.exit(1) # CREATE BATCHES jobStartImages = [] jobEndImages = [] jobLengths = [] if len(grouping_keys) > 0: print('Using groupings to assign the jobs to {} groups.'.format( len(groups))) for group in groups: #print 'group length',len(group[1]) #print group[1][1] jobStartImages.append(group[1][0]) jobEndImages.append(group[1][-1]) jobLengths.append(len(group[1])) #batch_size_max = max(jobLengths) print 'Starting images:' print jobStartImages else: print "No groupings assigned => " \ "images will be randomly assigned to the jobs." if int(args.batch_size) > 0: batch_size = int(args.batch_size) else: batch_size = max(4 , int(num_sets / float(num_jobs_max)) + 1) #batch_size = 4 #int(round(num_sets/num_jobs_max)+1) jobStartImages = range(1, num_sets + 1, batch_size) for x in jobStartImages: jobEndImages.append(x + batch_size - 1) jobEndImages[-1] = num_sets #batch_size_max = batch_size # # create directories # input_dir = input_dir.rstrip(os.path.sep) # remove trailing slash if exists output_dir = input_dir + '--cluster' print('Cluster directory: {}'.format(output_dir)) log_dir = os.path.join(output_dir, "log") # contains information about job status job_dir = os.path.join(output_dir, "jobs") # contains the actual job scripts # create directories ensure_empty_dir(output_dir) ensure_empty_dir(log_dir) ensure_empty_dir(job_dir) # # write the job files # for iJob in range(0, len(jobStartImages)): # chose image subset start = jobStartImages[iJob] end = jobEndImages[iJob] if end > num_sets: end = num_sets # write the jobs to files script_name = "job_{}.sh".format(iJob + 1) script_name = os.path.join(job_dir, script_name) script_file = file(script_name, "w") # information to LSF txt = ['#!/bin/bash', '#BSUB -oo "{}/job_{}--out.txt"'.format(log_dir,iJob+1), '#BSUB -eo "{}/job_{}--err.txt"'.format(log_dir,iJob+1), '#BSUB -M {}'.format(memory), '#BSUB -R select[mem>{}] -R rusage[mem={}]'.format(memory,memory), '#BSUB -R span[hosts=1]' ] txt = '\n'.join(txt) txt = txt + '\n' script_file.write(txt) if queue: script_file.write( '#BSUB -q {}\n'.format(queue) ) if host_group: script_file.write( '#BSUB -m {}\n'.format(host_group) ) script_file.write( 'echo "starting job"\n' ) # write a file to be able to check from the outside that the job has started script_file.write( 'touch "{}/job_{}--started.touch"\n'.format(log_dir,iJob+1) ) # do not produce core dumps script_file.write( 'ulimit -c 0\n' ) txt = ['echo "hostname:"', 'hostname', 'echo "number of cores:"', 'nproc', 'echo "total RAM:"', 'head -1 /proc/meminfo' ] txt = '\n'.join(txt) txt = txt + '\n' script_file.write(txt) # generate the actual command def make_command(software, script, script_arguments): cmd = [ software, "-c -b -r", "-p {}".format(script), script_arguments ] return ' '.join(cmd) script_arguments = "-f {} -l {}".format(start, end) # using software without quotation as it does not work with cmd = make_command(software, script, script_arguments) script_file.write(cmd + '\n') script_file.write( 'echo "job finished"\n' ) # this is the last line in the script, because this will be displayed as the job name by LSF ''' script_file.write( 'echo "# job {}"\n'.format(iJob) ) ''' script_file.close() # make script executable os.system('chmod a+x "{}"'.format(script_name)) return job_dir, len(jobStartImages)