示例#1
0
    def interface(self, 
                  start_signal,
                  image_set_start=1, 
                  image_set_end=None,
                  overwrite=True):
        '''Top-half thread for running an analysis.  Sets up grouping for jobs,
        deals with returned measurements, reports status periodically.

        start_signal- signal this semaphore when jobs are ready.
        image_set_start - beginning image set number to process
        image_set_end - last image set number to process
        overwrite - whether to recompute imagesets that already have data in initial_measurements.
        '''
        posted_analysis_started = False
        acknowledged_thread_start = False
        measurements = None
        workspace = None
        try:
            # listen for pipeline events, and pass them upstream
            self.pipeline.add_listener(lambda pipe, evt: self.post_event(evt))
            
            initial_measurements = None
            if self.output_path is None:
                # Caller wants a temporary measurements file.
                fd, filename = tempfile.mkstemp(".h5")
                try:
                    fd = os.fdopen(fd, "wb")
                    fd.write(self.initial_measurements_buf)
                    fd.close()
                    initial_measurements = cpmeas.Measurements(
                        filename=filename, mode="r")
                    measurements = cpmeas.Measurements(
                        image_set_start = None,
                        copy = initial_measurements,
                        mode = "a")
                finally:
                    if initial_measurements is not None:
                        initial_measurements.close()
                    os.unlink(filename)
            else:
                with open(self.output_path, "wb") as fd:
                    fd.write(self.initial_measurements_buf)
                measurements = cpmeas.Measurements(image_set_start=None,
                                                   filename=self.output_path,
                                                   mode="a")
            # The shared dicts are needed in jobserver()
            self.shared_dicts = [m.get_dictionary() for m in self.pipeline.modules()]
            workspace = cpw.Workspace(self.pipeline, None, None, None,
                                      measurements, cpimage.ImageSetList())
    
            if image_set_end is None:
                image_set_end = measurements.get_image_numbers()[-1]
            image_sets_to_process = filter(
                lambda x: x >= image_set_start and x <= image_set_end,
                measurements.get_image_numbers())

            self.post_event(AnalysisStarted())
            posted_analysis_started = True

            # reset the status of every image set that needs to be processed
            has_groups = measurements.has_groups()
            if self.pipeline.requires_aggregation():
                overwrite = True
            if has_groups and not overwrite:
                if not measurements.has_feature(cpmeas.IMAGE, self.STATUS):
                    overwrite = True
                else:
                    group_status = {}
                    for image_number in measurements.get_image_numbers():
                        group_number = measurements[
                            cpmeas.IMAGE, cpmeas.GROUP_NUMBER, image_number]
                        status = measurements[cpmeas.IMAGE, self.STATUS,
                                              image_number]
                        if status != self.STATUS_DONE:
                            group_status[group_number] = self.STATUS_UNPROCESSED
                        elif group_number not in group_status:
                            group_status[group_number] = self.STATUS_DONE
                            
            new_image_sets_to_process = []
            for image_set_number in image_sets_to_process:
                needs_reset = False
                if (overwrite or
                    (not measurements.has_measurements(
                        cpmeas.IMAGE, self.STATUS, image_set_number)) or
                    (measurements[cpmeas.IMAGE, self.STATUS, image_set_number] 
                     != self.STATUS_DONE)):
                    needs_reset = True
                elif has_groups:
                    group_number = measurements[
                        cpmeas.IMAGE, cpmeas.GROUP_NUMBER, image_set_number]
                    if group_status[group_number] != self.STATUS_DONE:
                        needs_reset = True
                if needs_reset:
                    measurements[cpmeas.IMAGE, self.STATUS, image_set_number] =\
                        self.STATUS_UNPROCESSED
                    new_image_sets_to_process.append(image_set_number)
            image_sets_to_process = new_image_sets_to_process

            # Find image groups.  These are written into measurements prior to
            # analysis.  Groups are processed as a single job.
            if has_groups or self.pipeline.requires_aggregation():
                worker_runs_post_group = True
                job_groups = {}
                for image_set_number in image_sets_to_process:
                    group_number = measurements[cpmeas.IMAGE, 
                                                cpmeas.GROUP_NUMBER, 
                                                image_set_number]
                    group_index = measurements[cpmeas.IMAGE, 
                                               cpmeas.GROUP_INDEX, 
                                               image_set_number]
                    job_groups[group_number] = job_groups.get(group_number, []) + [(group_index, image_set_number)]
                job_groups = [[isn for _, isn in sorted(job_groups[group_number])] 
                              for group_number in sorted(job_groups)]
            else:
                worker_runs_post_group = False  # prepare_group will be run in worker, but post_group is below.
                job_groups = [[image_set_number] for image_set_number in image_sets_to_process]

            # XXX - check that any constructed groups are complete, i.e.,
            # image_set_start and image_set_end shouldn't carve them up.

            if not worker_runs_post_group:
                # put the first job in the queue, then wait for the first image to
                # finish (see the check of self.finish_queue below) to post the rest.
                # This ensures that any shared data from the first imageset is
                # available to later imagesets.
                self.work_queue.put((job_groups[0], 
                                     worker_runs_post_group,
                                     True))
                waiting_for_first_imageset = True
                del job_groups[0]
            else:
                waiting_for_first_imageset = False
                for job in job_groups:
                    self.work_queue.put((job, worker_runs_post_group, False))
                job_groups = []
            start_signal.release()
            acknowledged_thread_start = True


            # We loop until every image is completed, or an outside event breaks the loop.
            while not self.cancelled:

                # gather measurements
                while not self.received_measurements_queue.empty():
                    image_numbers, buf = self.received_measurements_queue.get()
                    image_numbers = [int(i) for i in image_numbers]
                    recd_measurements = cpmeas.load_measurements_from_buffer(buf)
                    measurements.copy_relationships(recd_measurements)
                    for o in recd_measurements.get_object_names():
                        if o == cpmeas.EXPERIMENT:
                            continue  # Written during prepare_run / post_run
                        for feature in recd_measurements.get_feature_names(o):
                            measurements[o, feature, image_numbers] \
                                = recd_measurements[o, feature, image_numbers]
                    for image_set_number in image_numbers:
                        measurements[cpmeas.IMAGE, self.STATUS, image_set_number] = self.STATUS_DONE
                    recd_measurements.close()
                    del recd_measurements

                # check for jobs in progress
                while not self.in_process_queue.empty():
                    image_set_numbers = self.in_process_queue.get()
                    for image_set_number in image_set_numbers:
                        measurements[cpmeas.IMAGE, self.STATUS, int(image_set_number)] = self.STATUS_IN_PROCESS

                # check for finished jobs that haven't returned measurements, yet
                while not self.finished_queue.empty():
                    finished_req = self.finished_queue.get()
                    measurements[cpmeas.IMAGE, self.STATUS, int(finished_req.image_set_number)] = self.STATUS_FINISHED_WAITING
                    if waiting_for_first_imageset:
                        assert isinstance(finished_req, 
                                          ImageSetSuccessWithDictionary)
                        self.shared_dicts = finished_req.shared_dicts
                        waiting_for_first_imageset = False
                        assert len(self.shared_dicts) == len(self.pipeline.modules())
                        # if we had jobs waiting for the first image set to finish,
                        # queue them now that the shared state is available.
                        for job in job_groups:
                            self.work_queue.put((job, worker_runs_post_group, False))
                    finished_req.reply(Ack())

                # check progress and report
                counts = collections.Counter(measurements[cpmeas.IMAGE, self.STATUS, image_set_number]
                                             for image_set_number in image_sets_to_process)
                self.post_event(AnalysisProgress(counts))

                # Are we finished?
                if counts[self.STATUS_DONE] == len(image_sets_to_process):
                    last_image_number = measurements.get_image_numbers()[-1]
                    measurements.image_set_number = last_image_number
                    if not worker_runs_post_group:
                        self.pipeline.post_group(workspace, {})
                    
                    workspace = cpw.Workspace(self.pipeline,
                                              None, None, None,
                                              measurements, None, None)
                    workspace.post_run_display_handler = \
                        self.post_run_display_handler
                    self.pipeline.post_run(workspace)
                    break

                measurements.flush()
                # not done, wait for more work
                with self.interface_work_cv:
                    while (self.paused or
                           ((not self.cancelled) and
                            self.in_process_queue.empty() and
                            self.finished_queue.empty() and
                            self.received_measurements_queue.empty())):
                        self.interface_work_cv.wait()  # wait for a change of status or work to arrive
        finally:
            # Note - the measurements file is owned by the queue consumer
            #        after this post_event.
            #
            if not acknowledged_thread_start:
                start_signal.release()
            if posted_analysis_started:
                was_cancelled = self.cancelled
                self.post_event(AnalysisFinished(measurements, was_cancelled))
            self.stop_workers()
        self.analysis_id = False  # this will cause the jobserver thread to exit
示例#2
0
    def interface(self, 
                  start_signal,
                  image_set_start=1, 
                  image_set_end=None,
                  overwrite=True):
        '''Top-half thread for running an analysis.  Sets up grouping for jobs,
        deals with returned measurements, reports status periodically.

        start_signal- signal this semaphore when jobs are ready.
        image_set_start - beginning image set number to process
        image_set_end - last image set number to process
        overwrite - whether to recompute imagesets that already have data in initial_measurements.
        '''
        posted_analysis_started = False
        acknowledged_thread_start = False
        measurements = None
        workspace = None
        try:
            # listen for pipeline events, and pass them upstream
            self.pipeline.add_listener(lambda pipe, evt: self.post_event(evt))
            
            initial_measurements = None
            if self.output_path is None:
                # Caller wants a temporary measurements file.
                fd, filename = tempfile.mkstemp(".h5")
                try:
                    fd = os.fdopen(fd, "wb")
                    fd.write(self.initial_measurements_buf)
                    fd.close()
                    initial_measurements = cpmeas.Measurements(
                        filename=filename, mode="r")
                    measurements = cpmeas.Measurements(
                        image_set_start = None,
                        copy = initial_measurements,
                        mode = "a")
                finally:
                    if initial_measurements is not None:
                        initial_measurements.close()
                    os.unlink(filename)
            else:
                with open(self.output_path, "wb") as fd:
                    fd.write(self.initial_measurements_buf)
                measurements = cpmeas.Measurements(image_set_start=None,
                                                   filename=self.output_path,
                                                   mode="a")
            # The shared dicts are needed in jobserver()
            self.shared_dicts = [m.get_dictionary() for m in self.pipeline.modules()]
            workspace = cpw.Workspace(self.pipeline, None, None, None,
                                      measurements, cpimage.ImageSetList())
    
            if image_set_end is None:
                image_set_end = measurements.get_image_numbers()[-1]
            image_sets_to_process = filter(
                lambda x: x >= image_set_start and x <= image_set_end,
                measurements.get_image_numbers())

            self.post_event(AnalysisStarted())
            posted_analysis_started = True

            # reset the status of every image set that needs to be processed
            has_groups = measurements.has_groups()
            if self.pipeline.requires_aggregation():
                overwrite = True
            if has_groups and not overwrite:
                if not measurements.has_feature(cpmeas.IMAGE, self.STATUS):
                    overwrite = True
                else:
                    group_status = {}
                    for image_number in measurements.get_image_numbers():
                        group_number = measurements[
                            cpmeas.IMAGE, cpmeas.GROUP_NUMBER, image_number]
                        status = measurements[cpmeas.IMAGE, self.STATUS,
                                              image_number]
                        if status != self.STATUS_DONE:
                            group_status[group_number] = self.STATUS_UNPROCESSED
                        elif group_number not in group_status:
                            group_status[group_number] = self.STATUS_DONE
                            
            new_image_sets_to_process = []
            for image_set_number in image_sets_to_process:
                needs_reset = False
                if (overwrite or
                    (not measurements.has_measurements(
                        cpmeas.IMAGE, self.STATUS, image_set_number)) or
                    (measurements[cpmeas.IMAGE, self.STATUS, image_set_number] 
                     != self.STATUS_DONE)):
                    needs_reset = True
                elif has_groups:
                    group_number = measurements[
                        cpmeas.IMAGE, cpmeas.GROUP_NUMBER, image_set_number]
                    if group_status[group_number] != self.STATUS_DONE:
                        needs_reset = True
                if needs_reset:
                    measurements[cpmeas.IMAGE, self.STATUS, image_set_number] =\
                        self.STATUS_UNPROCESSED
                    new_image_sets_to_process.append(image_set_number)
            image_sets_to_process = new_image_sets_to_process

            # Find image groups.  These are written into measurements prior to
            # analysis.  Groups are processed as a single job.
            if has_groups or self.pipeline.requires_aggregation():
                worker_runs_post_group = True
                job_groups = {}
                for image_set_number in image_sets_to_process:
                    group_number = measurements[cpmeas.IMAGE, 
                                                cpmeas.GROUP_NUMBER, 
                                                image_set_number]
                    group_index = measurements[cpmeas.IMAGE, 
                                               cpmeas.GROUP_INDEX, 
                                               image_set_number]
                    job_groups[group_number] = job_groups.get(group_number, []) + [(group_index, image_set_number)]
                job_groups = [[isn for _, isn in sorted(job_groups[group_number])] 
                              for group_number in sorted(job_groups)]
            else:
                worker_runs_post_group = False  # prepare_group will be run in worker, but post_group is below.
                job_groups = [[image_set_number] for image_set_number in image_sets_to_process]

            # XXX - check that any constructed groups are complete, i.e.,
            # image_set_start and image_set_end shouldn't carve them up.

            if not worker_runs_post_group:
                # put the first job in the queue, then wait for the first image to
                # finish (see the check of self.finish_queue below) to post the rest.
                # This ensures that any shared data from the first imageset is
                # available to later imagesets.
                self.work_queue.put((job_groups[0], 
                                     worker_runs_post_group,
                                     True))
                waiting_for_first_imageset = True
                del job_groups[0]
            else:
                waiting_for_first_imageset = False
                for job in job_groups:
                    self.work_queue.put((job, worker_runs_post_group, False))
                job_groups = []
            start_signal.release()
            acknowledged_thread_start = True


            # We loop until every image is completed, or an outside event breaks the loop.
            while not self.cancelled:

                # gather measurements
                while not self.received_measurements_queue.empty():
                    image_numbers, buf = self.received_measurements_queue.get()
                    image_numbers = [int(i) for i in image_numbers]
                    recd_measurements = cpmeas.load_measurements_from_buffer(buf)
                    self.copy_recieved_measurements(recd_measurements, measurements, image_numbers)
                    recd_measurements.close()
                    del recd_measurements

                # check for jobs in progress
                while not self.in_process_queue.empty():
                    image_set_numbers = self.in_process_queue.get()
                    for image_set_number in image_set_numbers:
                        measurements[cpmeas.IMAGE, self.STATUS, int(image_set_number)] = self.STATUS_IN_PROCESS

                # check for finished jobs that haven't returned measurements, yet
                while not self.finished_queue.empty():
                    finished_req = self.finished_queue.get()
                    measurements[cpmeas.IMAGE, self.STATUS, int(finished_req.image_set_number)] = self.STATUS_FINISHED_WAITING
                    if waiting_for_first_imageset:
                        assert isinstance(finished_req, 
                                          ImageSetSuccessWithDictionary)
                        self.shared_dicts = finished_req.shared_dicts
                        waiting_for_first_imageset = False
                        assert len(self.shared_dicts) == len(self.pipeline.modules())
                        # if we had jobs waiting for the first image set to finish,
                        # queue them now that the shared state is available.
                        for job in job_groups:
                            self.work_queue.put((job, worker_runs_post_group, False))
                    finished_req.reply(Ack())

                # check progress and report
                counts = collections.Counter(measurements[cpmeas.IMAGE, self.STATUS, image_set_number]
                                             for image_set_number in image_sets_to_process)
                self.post_event(AnalysisProgress(counts))

                # Are we finished?
                if counts[self.STATUS_DONE] == len(image_sets_to_process):
                    last_image_number = measurements.get_image_numbers()[-1]
                    measurements.image_set_number = last_image_number
                    if not worker_runs_post_group:
                        self.pipeline.post_group(workspace, {})
                    
                    workspace = cpw.Workspace(self.pipeline,
                                              None, None, None,
                                              measurements, None, None)
                    workspace.post_run_display_handler = \
                        self.post_run_display_handler
                    self.pipeline.post_run(workspace)
                    break

                measurements.flush()
                # not done, wait for more work
                with self.interface_work_cv:
                    while (self.paused or
                           ((not self.cancelled) and
                            self.in_process_queue.empty() and
                            self.finished_queue.empty() and
                            self.received_measurements_queue.empty())):
                        self.interface_work_cv.wait()  # wait for a change of status or work to arrive
        finally:
            # Note - the measurements file is owned by the queue consumer
            #        after this post_event.
            #
            if not acknowledged_thread_start:
                start_signal.release()
            if posted_analysis_started:
                was_cancelled = self.cancelled
                self.post_event(AnalysisFinished(measurements, was_cancelled))
            self.stop_workers()
        self.analysis_id = False  # this will cause the jobserver thread to exit
    def do_job(self, job):
        '''Handle a work request to its completion
        
        job - WorkRequest
        '''
        import cellprofiler.pipeline as cpp
        job_measurements = []
        try:
            send_dictionary = job.wants_dictionary
    
            logger.info("Starting job")
            # Fetch the pipeline and preferences for this analysis if we don't have it
            current_pipeline, current_preferences = \
                self.pipelines_and_preferences.get(
                    self.current_analysis_id, (None, None))
            if not current_pipeline:
                logger.debug("Fetching pipeline and preferences")
                rep = self.send(PipelinePreferencesRequest(
                    self.current_analysis_id))
                logger.debug("Received pipeline and preferences response")
                preferences_dict = rep.preferences
                # update preferences to match remote values
                cpprefs.set_preferences_from_dict(preferences_dict)
 
                logger.debug("Loading pipeline")
                pipeline_blob = rep.pipeline_blob.tostring()
                current_pipeline = cpp.Pipeline()
                current_pipeline.loadtxt(StringIO.StringIO(pipeline_blob), 
                                         raise_on_error=True)
                logger.debug("Pipeline loaded")
                current_pipeline.add_listener(
                    self.pipeline_listener.handle_event)
                current_preferences = rep.preferences
                self.pipelines_and_preferences[self.current_analysis_id] = (
                    current_pipeline, current_preferences)
            else:
                # update preferences to match remote values
                cpprefs.set_preferences_from_dict(current_preferences)
            
            # Reset the listener's state
            self.pipeline_listener.reset()
            logger.debug("Getting initial measurements")
            # Fetch the path to the intial measurements if needed.
            current_measurements = self.initial_measurements.get(
                self.current_analysis_id)
            if current_measurements is None:
                logger.debug("Sending initial measurements request")
                rep = self.send(InitialMeasurementsRequest(
                    self.current_analysis_id))
                logger.debug("Got initial measurements")
                current_measurements = \
                    self.initial_measurements[self.current_analysis_id] = \
                    cpmeas.load_measurements_from_buffer(rep.buf)
            else:
                logger.debug("Has initial measurements")
            # Make a copy of the measurements for writing during this job
            current_measurements = cpmeas.Measurements(copy=current_measurements)
            all_measurements.add(current_measurements)
            job_measurements.append(current_measurements)
        
            successful_image_set_numbers = []
            image_set_numbers = job.image_set_numbers
            worker_runs_post_group = job.worker_runs_post_group
            logger.info("Doing job: " + ",".join(map(str, image_set_numbers)))
        
            self.pipeline_listener.image_set_number = image_set_numbers[0]
        
            if not worker_runs_post_group:
                # Get the shared state from the first imageset in this run.
                shared_dicts = self.send(
                    SharedDictionaryRequest(self.current_analysis_id)).dictionaries
                assert len(shared_dicts) == len(current_pipeline.modules())
                for module, new_dict in zip(current_pipeline.modules(), 
                                            shared_dicts):
                    module.set_dictionary_for_worker(new_dict)
        
            # Run prepare group if this is the first image in the group.  We do
            # this here (even if there's no grouping in the pipeline) to ensure
            # that any changes to the modules' shared state dictionaries get
            # propagated correctly.
            should_process = True
            if current_measurements[cpmeas.IMAGE, 
                                    cpmeas.GROUP_INDEX, 
                                    image_set_numbers[0]] == 1:
                workspace = cpw.Workspace(current_pipeline, None, None, None,
                                          current_measurements, None, None)
                if not current_pipeline.prepare_group(
                    workspace, 
                    current_measurements.get_grouping_keys(), 
                    image_set_numbers):
                    # exception handled elsewhere, possibly cancelling this run.
                    should_process = False
                del workspace
        
            # process the images
            if should_process:
                abort = False
                for image_set_number in image_set_numbers:
                    gc.collect()
                    try:
                        self.pipeline_listener.image_set_number = image_set_number
                        current_pipeline.run_image_set(
                            current_measurements,
                            image_set_number,
                            self.interaction_handler,
                            self.display_handler,
                            self.cancel_handler)
                        if self.pipeline_listener.should_abort:
                            abort = True
                            break
                        elif self.pipeline_listener.should_skip:
                            # Report skipped image sets as successful so that
                            # analysis can complete.
                            # Report their measurements because some modules
                            # may have provided measurements before skipping.
                            pass
                        successful_image_set_numbers.append(image_set_number)
                        # Send an indication that the image set finished successfully.
                        if send_dictionary:
                            # The jobserver would like a copy of our modules' 
                            # run_state dictionaries.
                            ws = cpw.Workspace(current_pipeline, None, None, None,
                                               current_measurements, None, None)
                            dicts = [m.get_dictionary_for_worker() 
                                     for m in current_pipeline.modules()]
                            req = ImageSetSuccessWithDictionary(
                                self.current_analysis_id,
                                image_set_number=image_set_number,
                                shared_dicts = dicts)
                        else:
                            req = ImageSetSuccess(
                                self.current_analysis_id,
                                image_set_number = image_set_number)
                        rep = self.send(req)
                    except cpp.CancelledException:
                        logging.info("Aborting job after cancellation")
                        abort = True
                    except Exception:
                        try:
                            logging.error("Error in pipeline", exc_info=True)
                            if self.handle_exception(
                                image_set_number=image_set_number) == ED_STOP:
                                abort = True
                                break
                        except:
                            logging.error("Error in handling of pipeline exception", exc_info=True)
                            # this is bad.  We can't handle nested exceptions
                            # remotely so we just fail on this run.
                            abort = True
        
                if abort:
                    current_measurements.close()
                    job_measurements.remove(current_measurements)
                    return
        
                if worker_runs_post_group:
                    workspace = cpw.Workspace(current_pipeline, None, 
                                              current_measurements, None,
                                              current_measurements, None, None)
                    workspace.interaction_handler = self.interaction_handler
                    workspace.cancel_handler = self.cancel_handler
                    workspace.post_group_display_handler = \
                        self.post_group_display_handler
                    # There might be an exception in this call, but it will be
                    # handled elsewhere, and there's nothing we can do for it
                    # here.
                    current_pipeline.post_group(
                        workspace, 
                        current_measurements.get_grouping_keys())
        
            # send measurements back to server
            req = MeasurementsReport(self.current_analysis_id,
                                     buf=current_measurements.file_contents(),
                                     image_set_numbers=image_set_numbers)
            rep = self.send(req)
        
        except cpp.CancelledException:
            # Main thread received shutdown signal
            raise
        
        except Exception:
            logging.error("Error in worker", exc_info=True)
            if self.handle_exception() == ED_STOP:
                raise cpp.CancelledException("Cancelling after user-requested stop")
        finally:
            # Clean up any measurements owned by us
            for m in job_measurements:
                m.close()
示例#4
0
    def do_job(self, job):
        '''Handle a work request to its completion

        job - WorkRequest
        '''
        import cellprofiler.pipeline as cpp
        job_measurements = []
        try:
            send_dictionary = job.wants_dictionary

            logger.info("Starting job")
            # Fetch the pipeline and preferences for this analysis if we don't have it
            current_pipeline, current_preferences = \
                self.pipelines_and_preferences.get(
                    self.current_analysis_id, (None, None))
            if not current_pipeline:
                logger.debug("Fetching pipeline and preferences")
                rep = self.send(
                    PipelinePreferencesRequest(self.current_analysis_id))
                logger.debug("Received pipeline and preferences response")
                preferences_dict = rep.preferences
                # update preferences to match remote values
                cpprefs.set_preferences_from_dict(preferences_dict)

                logger.debug("Loading pipeline")
                pipeline_blob = rep.pipeline_blob.tostring()
                current_pipeline = cpp.Pipeline()
                current_pipeline.loadtxt(StringIO.StringIO(pipeline_blob),
                                         raise_on_error=True)
                logger.debug("Pipeline loaded")
                current_pipeline.add_listener(
                    self.pipeline_listener.handle_event)
                current_preferences = rep.preferences
                self.pipelines_and_preferences[self.current_analysis_id] = (
                    current_pipeline, current_preferences)
            else:
                # update preferences to match remote values
                cpprefs.set_preferences_from_dict(current_preferences)

            # Reset the listener's state
            self.pipeline_listener.reset()
            logger.debug("Getting initial measurements")
            # Fetch the path to the intial measurements if needed.
            current_measurements = self.initial_measurements.get(
                self.current_analysis_id)
            if current_measurements is None:
                logger.debug("Sending initial measurements request")
                rep = self.send(
                    InitialMeasurementsRequest(self.current_analysis_id))
                logger.debug("Got initial measurements")
                current_measurements = \
                    self.initial_measurements[self.current_analysis_id] = \
                    cpmeas.load_measurements_from_buffer(rep.buf)
            else:
                logger.debug("Has initial measurements")
            # Make a copy of the measurements for writing during this job
            current_measurements = cpmeas.Measurements(
                copy=current_measurements)
            all_measurements.add(current_measurements)
            job_measurements.append(current_measurements)

            successful_image_set_numbers = []
            image_set_numbers = job.image_set_numbers
            worker_runs_post_group = job.worker_runs_post_group
            logger.info("Doing job: " + ",".join(map(str, image_set_numbers)))

            self.pipeline_listener.image_set_number = image_set_numbers[0]

            if not worker_runs_post_group:
                # Get the shared state from the first imageset in this run.
                shared_dicts = self.send(
                    SharedDictionaryRequest(
                        self.current_analysis_id)).dictionaries
                assert len(shared_dicts) == len(current_pipeline.modules())
                for module, new_dict in zip(current_pipeline.modules(),
                                            shared_dicts):
                    module.set_dictionary_for_worker(new_dict)

            # Run prepare group if this is the first image in the group.  We do
            # this here (even if there's no grouping in the pipeline) to ensure
            # that any changes to the modules' shared state dictionaries get
            # propagated correctly.
            should_process = True
            if current_measurements[cpmeas.IMAGE, cpmeas.GROUP_INDEX,
                                    image_set_numbers[0]] == 1:
                workspace = cpw.Workspace(current_pipeline, None, None, None,
                                          current_measurements, None, None)
                if not current_pipeline.prepare_group(
                        workspace, current_measurements.get_grouping_keys(),
                        image_set_numbers):
                    # exception handled elsewhere, possibly cancelling this run.
                    should_process = False
                del workspace

            # process the images
            if should_process:
                abort = False
                for image_set_number in image_set_numbers:
                    gc.collect()
                    try:
                        self.pipeline_listener.image_set_number = image_set_number
                        last_workspace = current_pipeline.run_image_set(
                            current_measurements, image_set_number,
                            self.interaction_handler, self.display_handler,
                            self.cancel_handler)
                        if self.pipeline_listener.should_abort:
                            abort = True
                            break
                        elif self.pipeline_listener.should_skip:
                            # Report skipped image sets as successful so that
                            # analysis can complete.
                            # Report their measurements because some modules
                            # may have provided measurements before skipping.
                            pass
                        successful_image_set_numbers.append(image_set_number)
                        # Send an indication that the image set finished successfully.
                        if send_dictionary:
                            # The jobserver would like a copy of our modules'
                            # run_state dictionaries.
                            dicts = [
                                m.get_dictionary_for_worker()
                                for m in current_pipeline.modules()
                            ]
                            req = ImageSetSuccessWithDictionary(
                                self.current_analysis_id,
                                image_set_number=image_set_number,
                                shared_dicts=dicts)
                        else:
                            req = ImageSetSuccess(
                                self.current_analysis_id,
                                image_set_number=image_set_number)
                        rep = self.send(req)
                    except cpp.CancelledException:
                        logging.info("Aborting job after cancellation")
                        abort = True
                    except Exception:
                        try:
                            logging.error("Error in pipeline", exc_info=True)
                            if self.handle_exception(
                                    image_set_number=image_set_number
                            ) == ED_STOP:
                                abort = True
                                break
                        except:
                            logging.error(
                                "Error in handling of pipeline exception",
                                exc_info=True)
                            # this is bad.  We can't handle nested exceptions
                            # remotely so we just fail on this run.
                            abort = True

                if abort:
                    current_measurements.close()
                    job_measurements.remove(current_measurements)
                    return

                if worker_runs_post_group:
                    last_workspace.interaction_handler =\
                        self.interaction_handler
                    last_workspace.cancel_handler = self.cancel_handler
                    last_workspace.post_group_display_handler = \
                        self.post_group_display_handler
                    # There might be an exception in this call, but it will be
                    # handled elsewhere, and there's nothing we can do for it
                    # here.
                    current_pipeline.post_group(
                        last_workspace,
                        current_measurements.get_grouping_keys())
                    del last_workspace

            # send measurements back to server
            req = MeasurementsReport(self.current_analysis_id,
                                     buf=current_measurements.file_contents(),
                                     image_set_numbers=image_set_numbers)
            rep = self.send(req)

        except cpp.CancelledException:
            # Main thread received shutdown signal
            raise

        except Exception:
            logging.error("Error in worker", exc_info=True)
            if self.handle_exception() == ED_STOP:
                raise cpp.CancelledException(
                    "Cancelling after user-requested stop")
        finally:
            # Clean up any measurements owned by us
            for m in job_measurements:
                m.close()
示例#5
0
 def test_03_08_a_sad_moment(self):
     #
     # Run using the good pipeline, but change one of the URLs so
     # an exception is thrown.
     #
     self.awthread = self.AWThread(self.announce_addr)
     self.awthread.start()
     self.set_work_socket()
     self.awthread.ex(self.awthread.aw.do_job, 
                      cpanalysis.WorkReply(
                          image_set_numbers = [2, 3],
                          worker_runs_post_group = False,
                          wants_dictionary = False))
     #
     # The worker should ask for the pipeline and preferences next.
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.PipelinePreferencesRequest)
     self.assertEqual(req.analysis_id, self.analysis_id)
     
     input_dir = os.path.join(example_images_directory(), "ExampleSBSImages")
     cpprefs.set_default_image_directory(input_dir)
     preferences = {cpprefs.DEFAULT_IMAGE_DIRECTORY: 
                    cpprefs.config_read(cpprefs.DEFAULT_IMAGE_DIRECTORY) }
     
     rep = cpanalysis.Reply(
         pipeline_blob = np.array(GOOD_PIPELINE),
         preferences = preferences)
     req.reply(rep)
     #
     # The worker asks for the initial measurements.
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.InitialMeasurementsRequest)
     self.assertEqual(req.analysis_id, self.analysis_id)
     m = get_measurements_for_good_pipeline(nimages=3)
     m[cpmeas.IMAGE, M_IMAGE_SET, 2] = np.zeros(100, np.uint8)
     try:
         req.reply(cpanalysis.Reply(buf = m.file_contents()))
     finally:
         m.close()
     #
     # Next, the worker asks for the shared dictionary
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.SharedDictionaryRequest)
     shared_dictionaries = [{ ("foo%d" % i):"bar%d" % i} for i in range(1,8)]
     rep = cpanalysis.SharedDictionaryReply(
         dictionaries = shared_dictionaries)
     req.reply(rep)
     #
     # The worker should choke somewhere in NamesAndTypes, but we
     # tell the worker to skip the rest of the imageset.
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.ExceptionReport)
     req.reply(cpanalysis.ExceptionPleaseDebugReply(disposition = ED_SKIP))
     #
     # The worker should send ImageSetSuccess for image set 2 anyway.
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.ImageSetSuccess)
     self.assertEqual(req.image_set_number, 2)
     req.reply(cpanalysis.Ack())
     #
     # And then it tells us about image set 3
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.ImageSetSuccess)
     self.assertEqual(req.image_set_number, 3)
     req.reply(cpanalysis.Ack())
     #
     # The worker should then report the measurements for both 2 and 3
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.MeasurementsReport)
     self.assertSequenceEqual(req.image_set_numbers, [2, 3])
     m = cpmeas.load_measurements_from_buffer(req.buf)
     #
     # Spot check for some expected stuff
     #
     self.assertTrue(m.has_feature(cpmeas.IMAGE, C_COUNT+"_Nuclei"))
     self.assertTrue(m.has_feature("Nuclei", M_LOCATION_CENTER_X))
     self.assertTrue(m.has_feature("Nuclei", "AreaShape_Area"))
     #
     # The count for the skipped image should be None
     #
     count = m[cpmeas.IMAGE, C_COUNT + "_Nuclei", 2]
     self.assertIsNone(count)
     count = m[cpmeas.IMAGE, C_COUNT + "_Nuclei", 3]
     center_x = m["Nuclei", M_LOCATION_CENTER_X, 3]
     self.assertEqual(count, len(center_x))
     req.reply(cpanalysis.Ack())
     self.awthread.ecute()
示例#6
0
 def test_03_06_the_happy_path_chapter_2(self):
     #
     # Give the worker image sets # 2 and 3 and tell it to run post_group
     #
     self.awthread = self.AWThread(self.announce_addr)
     self.awthread.start()
     self.set_work_socket()
     self.awthread.ex(self.awthread.aw.do_job, 
                      cpanalysis.WorkReply(
                          image_set_numbers = [2, 3],
                          worker_runs_post_group = True,
                          wants_dictionary = False))
     #
     # The worker should ask for the pipeline and preferences next.
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.PipelinePreferencesRequest)
     self.assertEqual(req.analysis_id, self.analysis_id)
     
     input_dir = os.path.join(example_images_directory(), "ExampleSBSImages")
     cpprefs.set_default_image_directory(input_dir)
     preferences = {cpprefs.DEFAULT_IMAGE_DIRECTORY: 
                    cpprefs.config_read(cpprefs.DEFAULT_IMAGE_DIRECTORY) }
     
     rep = cpanalysis.Reply(
         pipeline_blob = np.array(DISPLAY_PIPELINE),
         preferences = preferences)
     req.reply(rep)
     #
     # The worker asks for the initial measurements.
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.InitialMeasurementsRequest)
     self.assertEqual(req.analysis_id, self.analysis_id)
     m = get_measurements_for_good_pipeline(nimages=3)
     try:
         req.reply(cpanalysis.Reply(buf = m.file_contents()))
     finally:
         m.close()
     #
     # In group mode, the worker issues a display request and constructs
     # its own dictonaries
     #
     for image_number in (2, 3):
         #
         # The worker sends a display request for FlipAndRotate
         #
         req = self.awthread.recv(self.work_socket)
         self.assertIsInstance(req, cpanalysis.DisplayRequest)
         req.reply(cpanalysis.Ack())
         #
         # The worker sends ImageSetSuccess.
         #
         req = self.awthread.recv(self.work_socket)
         self.assertIsInstance(req, cpanalysis.ImageSetSuccess)
         self.assertEqual(req.image_set_number, image_number)
         req.reply(cpanalysis.Ack())
     #
     # The worker sends a DisplayPostGroup request for FlipAndRotate
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.DisplayPostGroupRequest)
     self.assertEqual(req.image_set_number, 3)
     req.reply(cpanalysis.Ack())
     #
     # The worker sends a measurement report for image sets 2 and 3
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.MeasurementsReport)
     self.assertSequenceEqual(req.image_set_numbers, [2, 3])
     m = cpmeas.load_measurements_from_buffer(req.buf)
     #
     # Spot check for some expected stuff
     #
     self.assertTrue(m.has_feature(cpmeas.IMAGE, C_COUNT+"_Nuclei"))
     self.assertTrue(m.has_feature("Nuclei", M_LOCATION_CENTER_X))
     self.assertTrue(m.has_feature("Nuclei", "AreaShape_Area"))
     req.reply(cpanalysis.Ack())
     self.awthread.ecute()
示例#7
0
 def test_03_05_the_happy_path_chapter_1(self):
     #
     # Run the worker clear through to the end
     # for the first imageset
     #
     self.awthread = self.AWThread(self.announce_addr)
     self.awthread.start()
     self.set_work_socket()
     self.awthread.ex(self.awthread.aw.do_job, 
                      cpanalysis.WorkReply(
                          image_set_numbers = [1],
                          worker_runs_post_group = False,
                          wants_dictionary = True))
     #
     # The worker should ask for the pipeline and preferences next.
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.PipelinePreferencesRequest)
     self.assertEqual(req.analysis_id, self.analysis_id)
     
     input_dir = os.path.join(example_images_directory(), "ExampleSBSImages")
     cpprefs.set_default_image_directory(input_dir)
     preferences = {cpprefs.DEFAULT_IMAGE_DIRECTORY: 
                    cpprefs.config_read(cpprefs.DEFAULT_IMAGE_DIRECTORY) }
     
     rep = cpanalysis.Reply(
         pipeline_blob = np.array(DISPLAY_PIPELINE),
         preferences = preferences)
     req.reply(rep)
     #
     # The worker asks for the initial measurements.
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.InitialMeasurementsRequest)
     self.assertEqual(req.analysis_id, self.analysis_id)
     m = get_measurements_for_good_pipeline()
     try:
         req.reply(cpanalysis.Reply(buf = m.file_contents()))
     finally:
         m.close()
     #
     # Next, the worker asks for the shared dictionary
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.SharedDictionaryRequest)
     shared_dictionaries = [{ ("foo%d" % i):"bar%d" % i} for i in range(1,8)]
     rep = cpanalysis.SharedDictionaryReply(
         dictionaries = shared_dictionaries)
     req.reply(rep)
     #
     # The worker sends a display request for FlipAndRotate
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.DisplayRequest)
     self.assertEqual(req.image_set_number, 1)
     d = req.display_data_dict
     # Possibly, this will break if someone edits FlipAndRotate. Sorry.
     self.assertItemsEqual(d.keys(), 
                           ['vmax', 'output_image_pixel_data', 
                            'image_pixel_data', 'vmin'])
     self.assertIsInstance(d['output_image_pixel_data'], np.ndarray)
     req.reply(cpanalysis.Ack())
     #
     # The worker sends ImageSetSuccessWithDictionary.
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.ImageSetSuccessWithDictionary)
     self.assertEqual(req.image_set_number, 1)
     for expected, actual in zip(shared_dictionaries, req.shared_dicts):
         self.assertDictEqual(expected, actual)
     req.reply(cpanalysis.Ack())
     #
     # The worker sends the measurement report
     #
     req = self.awthread.recv(self.work_socket)
     self.assertIsInstance(req, cpanalysis.MeasurementsReport)
     self.assertSequenceEqual(req.image_set_numbers, [1])
     m = cpmeas.load_measurements_from_buffer(req.buf)
     #
     # Spot check for some expected stuff
     #
     self.assertTrue(m.has_feature(cpmeas.IMAGE, C_COUNT+"_Nuclei"))
     self.assertTrue(m.has_feature("Nuclei", M_LOCATION_CENTER_X))
     self.assertTrue(m.has_feature("Nuclei", "AreaShape_Area"))
     req.reply(cpanalysis.Ack())
     self.awthread.ecute()