示例#1
0
def test_load_objects():
    r = numpy.random.RandomState()
    r.seed(1101)
    labels = r.randint(0, 10, size=(30, 20)).astype(numpy.uint8)
    handle, name = tempfile.mkstemp(".png")
    bioformats.write_image(name, labels, bioformats.PT_UINT8)
    os.close(handle)
    png_path, png_file = os.path.split(name)
    sbs_dir = os.path.join(tests.modules.example_images_directory(),
                           "ExampleSBSImages")
    csv_text = """%s_%s,%s_%s,%s_DNA,%s_DNA
%s,%s,Channel2-01-A-01.tif,%s
""" % (
        cellprofiler.measurement.C_OBJECTS_FILE_NAME,
        OBJECTS_NAME,
        cellprofiler.measurement.C_OBJECTS_PATH_NAME,
        OBJECTS_NAME,
        cellprofiler.measurement.C_FILE_NAME,
        cellprofiler.measurement.C_PATH_NAME,
        png_file,
        png_path,
        sbs_dir,
    )
    pipeline, module, csv_name = make_pipeline(csv_text)
    assert isinstance(pipeline, cellprofiler.pipeline.Pipeline)
    assert isinstance(module, cellprofiler.modules.loaddata.LoadData)
    module.wants_images.value = True
    try:
        image_set_list = cellprofiler.image.ImageSetList()
        measurements = cellprofiler.measurement.Measurements()
        workspace = cellprofiler.workspace.Workspace(pipeline, module, None,
                                                     None, measurements,
                                                     image_set_list)
        pipeline.prepare_run(workspace)
        key_names, g = pipeline.get_groupings(workspace)
        assert len(g) == 1
        module.prepare_group(workspace, g[0][0], g[0][1])
        image_set = image_set_list.get_image_set(g[0][1][0] - 1)
        object_set = cellprofiler.object.ObjectSet()
        workspace = cellprofiler.workspace.Workspace(pipeline, module,
                                                     image_set, object_set,
                                                     measurements,
                                                     image_set_list)
        module.run(workspace)
        objects = object_set.get_objects(OBJECTS_NAME)
        assert numpy.all(objects.segmented == labels)
        assert (measurements.get_current_image_measurement(
            cellprofiler.measurement.FF_COUNT % OBJECTS_NAME) == 9)
        for feature in (
                cellprofiler.measurement.M_LOCATION_CENTER_X,
                cellprofiler.measurement.M_LOCATION_CENTER_Y,
                cellprofiler.measurement.M_NUMBER_OBJECT_NUMBER,
        ):
            value = measurements.get_current_measurement(OBJECTS_NAME, feature)
            assert len(value) == 9
    finally:
        bioformats.formatreader.clear_image_reader_cache()
        os.remove(name)
        os.remove(csv_name)
示例#2
0
def test_subtract():
    """Test correction by subtraction"""
    numpy.random.seed(0)
    image = numpy.random.uniform(size=(10, 10)).astype(numpy.float32)
    illum = numpy.random.uniform(size=(10, 10)).astype(numpy.float32)
    expected = image - illum
    expected[expected < 0] = 0
    pipeline = cellprofiler.pipeline.Pipeline()
    pipeline.add_listener(error_callback)
    input_module = cellprofiler.modules.injectimage.InjectImage(
        "InputImage", image)
    input_module.set_module_num(1)
    pipeline.add_module(input_module)
    illum_module = cellprofiler.modules.injectimage.InjectImage(
        "IllumImage", illum)
    illum_module.set_module_num(2)
    pipeline.add_module(illum_module)
    module = cellprofiler.modules.correctilluminationapply.CorrectIlluminationApply(
    )
    module.set_module_num(3)
    pipeline.add_module(module)
    image = module.images[0]
    image.image_name.value = "InputImage"
    image.illum_correct_function_image_name.value = "IllumImage"
    image.corrected_image_name.value = "OutputImage"
    image.divide_or_subtract.value = (
        cellprofiler.modules.correctilluminationapply.DOS_SUBTRACT)
    image.rescale_option = cellprofiler.modules.correctilluminationapply.RE_NONE
    measurements = cellprofiler.measurement.Measurements()
    image_set_list = cellprofiler.image.ImageSetList()
    measurements = cellprofiler.measurement.Measurements()
    workspace = cellprofiler.workspace.Workspace(pipeline, None, None, None,
                                                 measurements, image_set_list)
    pipeline.prepare_run(workspace)
    input_module.prepare_group(workspace, {}, [1])
    illum_module.prepare_group(workspace, {}, [1])
    module.prepare_group(workspace, {}, [1])
    image_set = image_set_list.get_image_set(0)
    object_set = cellprofiler.object.ObjectSet()
    workspace = cellprofiler.workspace.Workspace(pipeline, input_module,
                                                 image_set, object_set,
                                                 measurements, image_set_list)
    input_module.run(workspace)
    illum_module.run(workspace)
    module.run(workspace)
    output_image = workspace.image_set.get_image("OutputImage")
    assert numpy.all(output_image.pixel_data == expected)
    def test_11_01_load_objects(self):
        r = numpy.random.RandomState()
        r.seed(1101)
        labels = r.randint(0, 10, size=(30, 20)).astype(numpy.uint8)
        handle, name = tempfile.mkstemp(".png")
        bioformats.write_image(name, labels, bioformats.PT_UINT8)
        os.close(handle)
        png_path, png_file = os.path.split(name)
        sbs_dir = os.path.join(tests.modules.example_images_directory(), "ExampleSBSImages")
        csv_text = """%s_%s,%s_%s,%s_DNA,%s_DNA
%s,%s,Channel2-01-A-01.tif,%s
""" % (cellprofiler.measurement.C_OBJECTS_FILE_NAME, OBJECTS_NAME,
       cellprofiler.measurement.C_OBJECTS_PATH_NAME, OBJECTS_NAME,
       cellprofiler.measurement.C_FILE_NAME, cellprofiler.measurement.C_PATH_NAME,
       png_file, png_path, sbs_dir)
        pipeline, module, csv_name = self.make_pipeline(csv_text)
        assert isinstance(pipeline, cellprofiler.pipeline.Pipeline)
        assert isinstance(module, cellprofiler.modules.loaddata.LoadData)
        module.wants_images.value = True
        try:
            image_set_list = cellprofiler.image.ImageSetList()
            measurements = cellprofiler.measurement.Measurements()
            workspace = cellprofiler.workspace.Workspace(
                    pipeline, module, None, None, measurements, image_set_list)
            pipeline.prepare_run(workspace)
            key_names, g = pipeline.get_groupings(workspace)
            self.assertEqual(len(g), 1)
            module.prepare_group(workspace, g[0][0], g[0][1])
            image_set = image_set_list.get_image_set(g[0][1][0] - 1)
            object_set = cellprofiler.object.ObjectSet()
            workspace = cellprofiler.workspace.Workspace(pipeline, module, image_set,
                                                         object_set, measurements, image_set_list)
            module.run(workspace)
            objects = object_set.get_objects(OBJECTS_NAME)
            self.assertTrue(numpy.all(objects.segmented == labels))
            self.assertEqual(measurements.get_current_image_measurement(
                cellprofiler.measurement.FF_COUNT % OBJECTS_NAME), 9)
            for feature in (cellprofiler.measurement.M_LOCATION_CENTER_X,
                            cellprofiler.measurement.M_LOCATION_CENTER_Y,
                            cellprofiler.measurement.M_NUMBER_OBJECT_NUMBER):
                value = measurements.get_current_measurement(
                        OBJECTS_NAME, feature)
                self.assertEqual(len(value), 9)
        finally:
            bioformats.formatreader.clear_image_reader_cache()
            os.remove(name)
            os.remove(csv_name)
 def test_prepare_run():
     # regression test for issue #673 and #1161
     #
     # If LoadSingleImage appears first, pathname data does not show
     # up in the measurements.
     #
     tests.modules.maybe_download_sbs()
     folder = "ExampleSBSImages"
     path = os.path.join(tests.modules.example_images_directory(), folder)
     filename = "Channel1-01-A-01.tif"
     pipeline = cellprofiler.pipeline.Pipeline()
     lsi = cellprofiler.modules.loadsingleimage.LoadSingleImage()
     lsi.set_module_num(1)
     lsi.directory.dir_choice = cellprofiler.setting.ABSOLUTE_FOLDER_NAME
     lsi.directory.custom_path = path
     lsi.file_settings[0].image_name.value = get_image_name(0)
     lsi.file_settings[0].file_name.value = filename
     pipeline.add_module(lsi)
     li = cellprofiler.modules.loadimages.LoadImages()
     li.set_module_num(2)
     pipeline.add_module(li)
     li.match_method.value = cellprofiler.modules.loadimages.MS_EXACT_MATCH
     li.location.dir_choice = cellprofiler.setting.ABSOLUTE_FOLDER_NAME
     li.location.custom_path = path
     li.images[0].common_text.value = "Channel2-"
     m = cellprofiler.measurement.Measurements()
     workspace = cellprofiler.workspace.Workspace(
         pipeline,
         lsi,
         m,
         cellprofiler.object.ObjectSet(),
         m,
         cellprofiler.image.ImageSetList(),
     )
     assert pipeline.prepare_run(workspace)
     assert m.image_set_count > 1
     pipeline.prepare_group(workspace, {}, m.get_image_numbers())
     #
     # Skip to the second image set
     #
     m.next_image_set(2)
     lsi.run(workspace)
     #
     # Are the measurements populated?
     #
     m_file = "_".join(
         (cellprofiler.measurement.C_FILE_NAME, get_image_name(0)))
     assert m[cellprofiler.measurement.IMAGE, m_file, 2] == filename
     #
     # Can we retrieve the image?
     #
     pixel_data = m.get_image(get_image_name(0)).pixel_data
     assert not numpy.isscalar(pixel_data)
 def test_02_03_prepare_run(self):
     # regression test for issue #673 and #1161
     #
     # If LoadSingleImage appears first, pathname data does not show
     # up in the measurements.
     #
     tests.modules.maybe_download_sbs()
     folder = "ExampleSBSImages"
     path = os.path.join(tests.modules.example_images_directory(), folder)
     filename = "Channel1-01-A-01.tif"
     pipeline = cellprofiler.pipeline.Pipeline()
     lsi = cellprofiler.modules.loadsingleimage.LoadSingleImage()
     lsi.module_num = 1
     lsi.directory.dir_choice = cellprofiler.setting.ABSOLUTE_FOLDER_NAME
     lsi.directory.custom_path = path
     lsi.file_settings[0].image_name.value = self.get_image_name(0)
     lsi.file_settings[0].file_name.value = filename
     pipeline.add_module(lsi)
     li = cellprofiler.modules.loadimages.LoadImages()
     li.module_num = 2
     pipeline.add_module(li)
     li.match_method.value = cellprofiler.modules.loadimages.MS_EXACT_MATCH
     li.location.dir_choice = cellprofiler.setting.ABSOLUTE_FOLDER_NAME
     li.location.custom_path = path
     li.images[0].common_text.value = "Channel2-"
     m = cellprofiler.measurement.Measurements()
     workspace = cellprofiler.workspace.Workspace(pipeline, lsi, m, cellprofiler.object.ObjectSet(), m,
                                                  cellprofiler.image.ImageSetList())
     self.assertTrue(pipeline.prepare_run(workspace))
     self.assertGreater(m.image_set_count, 1)
     pipeline.prepare_group(workspace, {}, m.get_image_numbers())
     #
     # Skip to the second image set
     #
     m.next_image_set(2)
     lsi.run(workspace)
     #
     # Are the measurements populated?
     #
     m_file = "_".join((cellprofiler.measurement.C_FILE_NAME, self.get_image_name(0)))
     self.assertEqual(m[cellprofiler.measurement.IMAGE, m_file, 2], filename)
     #
     # Can we retrieve the image?
     #
     pixel_data = m.get_image(self.get_image_name(0)).pixel_data
     self.assertFalse(numpy.isscalar(pixel_data))
def make_jobs(args):
    
    # how to make this nicer?
    xvfb = args.xvfb
    software = args.software
    script = args.script
    script_arguments = args.script_arguments
    memory = args.memory
    queue = args.queue
    host_group = args.host_group    
    num_jobs_max = 1000
    
    print ''
    print 'make_cellprofiler_jobs_LSF:'
    print ''
                

    #
    # determine files to analyze
    #
    
    CELLPROFILERDIR = '/g/software/linux/pack/cellprofiler-2.0.11047/lib'
    sys.path.insert(0, CELLPROFILERDIR)
    # try importing cellprofiler modules
    global cellprofiler
    import cellprofiler
    import cellprofiler.pipeline
    import cellprofiler.workspace
    import cellprofiler.measurements
    import cellprofiler.cpimage

    # LOAD PIPELINE
    pipeline_file = script
    if not os.path.isfile(pipeline_file):
        raise Exception("-- ERROR: pipeline file not found")

    cp_plugin_directory = None
    if 'CP_PLUGIN_DIRECTORY' in os.environ:
        cp_plugin_directory = os.environ['CP_PLUGIN_DIRECTORY']

    pipeline = cellprofiler.pipeline.Pipeline()

    def error_callback(event, caller):
        if (isinstance(event, cellprofiler.pipeline.LoadExceptionEvent) or
            isinstance(event, cellprofiler.pipeline.RunExceptionEvent)):
            sys.stdout.write("Handling exception: {}\n".format(event))
            sys.stdout.write(traceback.format_exc())
            sys.sydout.flush()
    pipeline.add_listener(error_callback)

    # pipeline.remove_listener(error_callback)

    # GET NUMBER OF IMAGES AND GROUPS

    # check wether we have a new or an old version of CellProfiler

    import inspect
    argspec = inspect.getargspec(pipeline.prepare_run)
    if argspec[0][1] == 'workspace' or len(argspec[0]) == 2:
        print 'New CellProfiler version'
        new_version = True
    else:
        print 'Old CellProfiler version'
        new_version = False

    if new_version:
        # this should work for plain pipeline files ...
        try:
            pipeline.load(pipeline_file)
            image_set_list = cellprofiler.cpimage.ImageSetList()
            measurements = cellprofiler.measurements.Measurements()
            workspace = cellprofiler.workspace.Workspace(
                pipeline, None, None, None,
                measurements, image_set_list
            )
            grouping_argument = workspace
            result = pipeline.prepare_run(workspace)
            grouping_keys, groups = pipeline.get_groupings(
                grouping_argument
            )
            pipeline.prepare_group(
                grouping_argument, groups[0][0], groups[0][1])
            num_sets = image_set_list.count()
        except:
            import traceback
            traceback.print_exc()
            raise Exception('Unable to load pipeline file:', pipeline_file)
            # ... and this should work for files created with
            # the CreateBatchFile module
            measurements = cellprofiler.measurements.load_measurements(
                pipeline_file
            )
            print 'Obtaining list of image sets...this can take a while...'
            image_set_list = measurements.get_image_numbers()
            grouping_keys = []
            num_sets = len(image_set_list)
    else:
        try:
            pipeline.load(pipeline_file)
        except:
            import traceback
            traceback.print_exc()
            raise Exception('Unable to load pipeline file:', pipeline_file)

        workspace = None
        grouping_argument = workspace

        print 'Obtaining list of image sets...this can take a while...'
        result = pipeline.prepare_run(workspace)
        if not result:
            raise Exception("Failed to prepare running the pipeline")

        if not new_version:
            grouping_argument = result
            image_set_list = result

        grouping_keys, groups = pipeline.get_groupings(grouping_argument)

        if new_version:
            pipeline.prepare_group(
                grouping_argument, groups[0][0], groups[0][1])

        num_sets = image_set_list.count()

    print("Image sets: {}".format(num_sets))
    if num_sets == 0:
        print 'No image sets to process...finished'
        sys.exit(0)

    # GET IMAGE PATH
    input_dir = None # could be also an directory with image files if one does not use Batch_data.mat....
    if input_dir is None:
        loadimage_module_name = 'LoadImages'
        cp_modules = pipeline.modules()
        loadimage_module = None
        for module in cp_modules:
            if module.module_name == loadimage_module_name:
                loadimage_module = module
                break
        if loadimage_module:
            input_dir = str(loadimage_module.location).partition('|')[2]
            print("Image path: {}".format(input_dir))
        else:
            print '-- WARNING: The LoadImage module is not used in this' \
                  ' pipeline. Default input folder is undefined'
            #print('-- ERROR: Could not load the image module!')
            #sys.exit(1)

    # CREATE BATCHES
    jobStartImages = []
    jobEndImages = []
    jobLengths = []

    if len(grouping_keys) > 0:
        print('Using groupings to assign the jobs to {} groups.'.format(
            len(groups)))
        for group in groups:
            #print 'group length',len(group[1])
            #print group[1][1]
            jobStartImages.append(group[1][0])
            jobEndImages.append(group[1][-1])
            jobLengths.append(len(group[1]))
        #batch_size_max = max(jobLengths)
        print 'Starting images:'
        print jobStartImages
    else:
        print "No groupings assigned => " \
              "images will be randomly assigned to the jobs."
        if int(args.batch_size) > 0:
            batch_size = int(args.batch_size)
        else:
            batch_size = max(4 , int(num_sets / float(num_jobs_max)) + 1)
        #batch_size = 4 #int(round(num_sets/num_jobs_max)+1)
        jobStartImages = range(1, num_sets + 1, batch_size)
        for x in jobStartImages:
            jobEndImages.append(x + batch_size - 1)
        jobEndImages[-1] = num_sets
        #batch_size_max = batch_size
    
    #
    # create directories
    #
    input_dir = input_dir.rstrip(os.path.sep) # remove trailing slash if exists
    output_dir = input_dir + '--cluster'
    print('Cluster directory: {}'.format(output_dir))
   
    log_dir = os.path.join(output_dir, "log")  # contains information about job status
    job_dir = os.path.join(output_dir, "jobs") # contains the actual job scripts

    # create directories
    ensure_empty_dir(output_dir)
    ensure_empty_dir(log_dir)
    ensure_empty_dir(job_dir)

    
    #
    # write the job files
    #
    
    for iJob in range(0, len(jobStartImages)):
        
        # chose image subset
        start = jobStartImages[iJob]
        end = jobEndImages[iJob]
        if end > num_sets:
            end = num_sets
   
        # write the jobs to files
        script_name = "job_{}.sh".format(iJob + 1)
        script_name = os.path.join(job_dir, script_name)
        script_file = file(script_name, "w")

        # information to LSF
        txt = ['#!/bin/bash',
                '#BSUB -oo "{}/job_{}--out.txt"'.format(log_dir,iJob+1),
                '#BSUB -eo "{}/job_{}--err.txt"'.format(log_dir,iJob+1),
                '#BSUB -M {}'.format(memory),
                '#BSUB -R select[mem>{}] -R rusage[mem={}]'.format(memory,memory),
                '#BSUB -R span[hosts=1]'
                ]
        txt = '\n'.join(txt)
        txt = txt + '\n'
        script_file.write(txt)


        if queue:
          script_file.write(
            '#BSUB -q {}\n'.format(queue)
          )        
          
        if host_group:
          script_file.write(
            '#BSUB -m {}\n'.format(host_group)
          )        

        
        script_file.write(
            'echo "starting job"\n'
        )        

        # write a file to be able to check from the outside that the job has started
        script_file.write(
            'touch "{}/job_{}--started.touch"\n'.format(log_dir,iJob+1)
        )

        # do not produce core dumps  
        script_file.write(
            'ulimit -c 0\n'
        )        
                
        txt = ['echo "hostname:"',
               'hostname',
               'echo "number of cores:"',
               'nproc',
               'echo "total RAM:"',
               'head -1 /proc/meminfo'
               ]
        txt = '\n'.join(txt)
        txt = txt + '\n'
        script_file.write(txt)

        # generate the actual command      
        def make_command(software, script, script_arguments):
            cmd = [
               software,
               "-c -b -r",
               "-p {}".format(script),
               script_arguments
            ]
            return ' '.join(cmd)

        script_arguments = "-f {} -l {}".format(start, end)
        
        # using software without quotation as it does not work with
        cmd = make_command(software, script, script_arguments)
        script_file.write(cmd + '\n')

        script_file.write(
            'echo "job finished"\n'
        )        
        
        # this is the last line in the script, because this will be displayed as the job name by LSF
        '''
        script_file.write(
            'echo "# job {}"\n'.format(iJob)
        )  
        '''
        script_file.close()
        
        # make script executable
        os.system('chmod a+x "{}"'.format(script_name))

    return job_dir, len(jobStartImages)