def interface(self, start_signal, image_set_start=1, image_set_end=None, overwrite=True): '''Top-half thread for running an analysis. Sets up grouping for jobs, deals with returned measurements, reports status periodically. start_signal- signal this semaphore when jobs are ready. image_set_start - beginning image set number to process image_set_end - last image set number to process overwrite - whether to recompute imagesets that already have data in initial_measurements. ''' posted_analysis_started = False acknowledged_thread_start = False measurements = None workspace = None try: # listen for pipeline events, and pass them upstream self.pipeline.add_listener(lambda pipe, evt: self.post_event(evt)) initial_measurements = None if self.output_path is None: # Caller wants a temporary measurements file. fd, filename = tempfile.mkstemp(".h5") try: fd = os.fdopen(fd, "wb") fd.write(self.initial_measurements_buf) fd.close() initial_measurements = cpmeas.Measurements( filename=filename, mode="r") measurements = cpmeas.Measurements( image_set_start = None, copy = initial_measurements, mode = "a") finally: if initial_measurements is not None: initial_measurements.close() os.unlink(filename) else: with open(self.output_path, "wb") as fd: fd.write(self.initial_measurements_buf) measurements = cpmeas.Measurements(image_set_start=None, filename=self.output_path, mode="a") # The shared dicts are needed in jobserver() self.shared_dicts = [m.get_dictionary() for m in self.pipeline.modules()] workspace = cpw.Workspace(self.pipeline, None, None, None, measurements, cpimage.ImageSetList()) if image_set_end is None: image_set_end = measurements.get_image_numbers()[-1] image_sets_to_process = filter( lambda x: x >= image_set_start and x <= image_set_end, measurements.get_image_numbers()) self.post_event(AnalysisStarted()) posted_analysis_started = True # reset the status of every image set that needs to be processed has_groups = measurements.has_groups() if self.pipeline.requires_aggregation(): overwrite = True if has_groups and not overwrite: if not measurements.has_feature(cpmeas.IMAGE, self.STATUS): overwrite = True else: group_status = {} for image_number in measurements.get_image_numbers(): group_number = measurements[ cpmeas.IMAGE, cpmeas.GROUP_NUMBER, image_number] status = measurements[cpmeas.IMAGE, self.STATUS, image_number] if status != self.STATUS_DONE: group_status[group_number] = self.STATUS_UNPROCESSED elif group_number not in group_status: group_status[group_number] = self.STATUS_DONE new_image_sets_to_process = [] for image_set_number in image_sets_to_process: needs_reset = False if (overwrite or (not measurements.has_measurements( cpmeas.IMAGE, self.STATUS, image_set_number)) or (measurements[cpmeas.IMAGE, self.STATUS, image_set_number] != self.STATUS_DONE)): needs_reset = True elif has_groups: group_number = measurements[ cpmeas.IMAGE, cpmeas.GROUP_NUMBER, image_set_number] if group_status[group_number] != self.STATUS_DONE: needs_reset = True if needs_reset: measurements[cpmeas.IMAGE, self.STATUS, image_set_number] =\ self.STATUS_UNPROCESSED new_image_sets_to_process.append(image_set_number) image_sets_to_process = new_image_sets_to_process # Find image groups. These are written into measurements prior to # analysis. Groups are processed as a single job. if has_groups or self.pipeline.requires_aggregation(): worker_runs_post_group = True job_groups = {} for image_set_number in image_sets_to_process: group_number = measurements[cpmeas.IMAGE, cpmeas.GROUP_NUMBER, image_set_number] group_index = measurements[cpmeas.IMAGE, cpmeas.GROUP_INDEX, image_set_number] job_groups[group_number] = job_groups.get(group_number, []) + [(group_index, image_set_number)] job_groups = [[isn for _, isn in sorted(job_groups[group_number])] for group_number in sorted(job_groups)] else: worker_runs_post_group = False # prepare_group will be run in worker, but post_group is below. job_groups = [[image_set_number] for image_set_number in image_sets_to_process] # XXX - check that any constructed groups are complete, i.e., # image_set_start and image_set_end shouldn't carve them up. if not worker_runs_post_group: # put the first job in the queue, then wait for the first image to # finish (see the check of self.finish_queue below) to post the rest. # This ensures that any shared data from the first imageset is # available to later imagesets. self.work_queue.put((job_groups[0], worker_runs_post_group, True)) waiting_for_first_imageset = True del job_groups[0] else: waiting_for_first_imageset = False for job in job_groups: self.work_queue.put((job, worker_runs_post_group, False)) job_groups = [] start_signal.release() acknowledged_thread_start = True # We loop until every image is completed, or an outside event breaks the loop. while not self.cancelled: # gather measurements while not self.received_measurements_queue.empty(): image_numbers, buf = self.received_measurements_queue.get() image_numbers = [int(i) for i in image_numbers] recd_measurements = cpmeas.load_measurements_from_buffer(buf) measurements.copy_relationships(recd_measurements) for o in recd_measurements.get_object_names(): if o == cpmeas.EXPERIMENT: continue # Written during prepare_run / post_run for feature in recd_measurements.get_feature_names(o): measurements[o, feature, image_numbers] \ = recd_measurements[o, feature, image_numbers] for image_set_number in image_numbers: measurements[cpmeas.IMAGE, self.STATUS, image_set_number] = self.STATUS_DONE recd_measurements.close() del recd_measurements # check for jobs in progress while not self.in_process_queue.empty(): image_set_numbers = self.in_process_queue.get() for image_set_number in image_set_numbers: measurements[cpmeas.IMAGE, self.STATUS, int(image_set_number)] = self.STATUS_IN_PROCESS # check for finished jobs that haven't returned measurements, yet while not self.finished_queue.empty(): finished_req = self.finished_queue.get() measurements[cpmeas.IMAGE, self.STATUS, int(finished_req.image_set_number)] = self.STATUS_FINISHED_WAITING if waiting_for_first_imageset: assert isinstance(finished_req, ImageSetSuccessWithDictionary) self.shared_dicts = finished_req.shared_dicts waiting_for_first_imageset = False assert len(self.shared_dicts) == len(self.pipeline.modules()) # if we had jobs waiting for the first image set to finish, # queue them now that the shared state is available. for job in job_groups: self.work_queue.put((job, worker_runs_post_group, False)) finished_req.reply(Ack()) # check progress and report counts = collections.Counter(measurements[cpmeas.IMAGE, self.STATUS, image_set_number] for image_set_number in image_sets_to_process) self.post_event(AnalysisProgress(counts)) # Are we finished? if counts[self.STATUS_DONE] == len(image_sets_to_process): last_image_number = measurements.get_image_numbers()[-1] measurements.image_set_number = last_image_number if not worker_runs_post_group: self.pipeline.post_group(workspace, {}) workspace = cpw.Workspace(self.pipeline, None, None, None, measurements, None, None) workspace.post_run_display_handler = \ self.post_run_display_handler self.pipeline.post_run(workspace) break measurements.flush() # not done, wait for more work with self.interface_work_cv: while (self.paused or ((not self.cancelled) and self.in_process_queue.empty() and self.finished_queue.empty() and self.received_measurements_queue.empty())): self.interface_work_cv.wait() # wait for a change of status or work to arrive finally: # Note - the measurements file is owned by the queue consumer # after this post_event. # if not acknowledged_thread_start: start_signal.release() if posted_analysis_started: was_cancelled = self.cancelled self.post_event(AnalysisFinished(measurements, was_cancelled)) self.stop_workers() self.analysis_id = False # this will cause the jobserver thread to exit
def interface(self, start_signal, image_set_start=1, image_set_end=None, overwrite=True): '''Top-half thread for running an analysis. Sets up grouping for jobs, deals with returned measurements, reports status periodically. start_signal- signal this semaphore when jobs are ready. image_set_start - beginning image set number to process image_set_end - last image set number to process overwrite - whether to recompute imagesets that already have data in initial_measurements. ''' posted_analysis_started = False acknowledged_thread_start = False measurements = None workspace = None try: # listen for pipeline events, and pass them upstream self.pipeline.add_listener(lambda pipe, evt: self.post_event(evt)) initial_measurements = None if self.output_path is None: # Caller wants a temporary measurements file. fd, filename = tempfile.mkstemp(".h5") try: fd = os.fdopen(fd, "wb") fd.write(self.initial_measurements_buf) fd.close() initial_measurements = cpmeas.Measurements( filename=filename, mode="r") measurements = cpmeas.Measurements( image_set_start = None, copy = initial_measurements, mode = "a") finally: if initial_measurements is not None: initial_measurements.close() os.unlink(filename) else: with open(self.output_path, "wb") as fd: fd.write(self.initial_measurements_buf) measurements = cpmeas.Measurements(image_set_start=None, filename=self.output_path, mode="a") # The shared dicts are needed in jobserver() self.shared_dicts = [m.get_dictionary() for m in self.pipeline.modules()] workspace = cpw.Workspace(self.pipeline, None, None, None, measurements, cpimage.ImageSetList()) if image_set_end is None: image_set_end = measurements.get_image_numbers()[-1] image_sets_to_process = filter( lambda x: x >= image_set_start and x <= image_set_end, measurements.get_image_numbers()) self.post_event(AnalysisStarted()) posted_analysis_started = True # reset the status of every image set that needs to be processed has_groups = measurements.has_groups() if self.pipeline.requires_aggregation(): overwrite = True if has_groups and not overwrite: if not measurements.has_feature(cpmeas.IMAGE, self.STATUS): overwrite = True else: group_status = {} for image_number in measurements.get_image_numbers(): group_number = measurements[ cpmeas.IMAGE, cpmeas.GROUP_NUMBER, image_number] status = measurements[cpmeas.IMAGE, self.STATUS, image_number] if status != self.STATUS_DONE: group_status[group_number] = self.STATUS_UNPROCESSED elif group_number not in group_status: group_status[group_number] = self.STATUS_DONE new_image_sets_to_process = [] for image_set_number in image_sets_to_process: needs_reset = False if (overwrite or (not measurements.has_measurements( cpmeas.IMAGE, self.STATUS, image_set_number)) or (measurements[cpmeas.IMAGE, self.STATUS, image_set_number] != self.STATUS_DONE)): needs_reset = True elif has_groups: group_number = measurements[ cpmeas.IMAGE, cpmeas.GROUP_NUMBER, image_set_number] if group_status[group_number] != self.STATUS_DONE: needs_reset = True if needs_reset: measurements[cpmeas.IMAGE, self.STATUS, image_set_number] =\ self.STATUS_UNPROCESSED new_image_sets_to_process.append(image_set_number) image_sets_to_process = new_image_sets_to_process # Find image groups. These are written into measurements prior to # analysis. Groups are processed as a single job. if has_groups or self.pipeline.requires_aggregation(): worker_runs_post_group = True job_groups = {} for image_set_number in image_sets_to_process: group_number = measurements[cpmeas.IMAGE, cpmeas.GROUP_NUMBER, image_set_number] group_index = measurements[cpmeas.IMAGE, cpmeas.GROUP_INDEX, image_set_number] job_groups[group_number] = job_groups.get(group_number, []) + [(group_index, image_set_number)] job_groups = [[isn for _, isn in sorted(job_groups[group_number])] for group_number in sorted(job_groups)] else: worker_runs_post_group = False # prepare_group will be run in worker, but post_group is below. job_groups = [[image_set_number] for image_set_number in image_sets_to_process] # XXX - check that any constructed groups are complete, i.e., # image_set_start and image_set_end shouldn't carve them up. if not worker_runs_post_group: # put the first job in the queue, then wait for the first image to # finish (see the check of self.finish_queue below) to post the rest. # This ensures that any shared data from the first imageset is # available to later imagesets. self.work_queue.put((job_groups[0], worker_runs_post_group, True)) waiting_for_first_imageset = True del job_groups[0] else: waiting_for_first_imageset = False for job in job_groups: self.work_queue.put((job, worker_runs_post_group, False)) job_groups = [] start_signal.release() acknowledged_thread_start = True # We loop until every image is completed, or an outside event breaks the loop. while not self.cancelled: # gather measurements while not self.received_measurements_queue.empty(): image_numbers, buf = self.received_measurements_queue.get() image_numbers = [int(i) for i in image_numbers] recd_measurements = cpmeas.load_measurements_from_buffer(buf) self.copy_recieved_measurements(recd_measurements, measurements, image_numbers) recd_measurements.close() del recd_measurements # check for jobs in progress while not self.in_process_queue.empty(): image_set_numbers = self.in_process_queue.get() for image_set_number in image_set_numbers: measurements[cpmeas.IMAGE, self.STATUS, int(image_set_number)] = self.STATUS_IN_PROCESS # check for finished jobs that haven't returned measurements, yet while not self.finished_queue.empty(): finished_req = self.finished_queue.get() measurements[cpmeas.IMAGE, self.STATUS, int(finished_req.image_set_number)] = self.STATUS_FINISHED_WAITING if waiting_for_first_imageset: assert isinstance(finished_req, ImageSetSuccessWithDictionary) self.shared_dicts = finished_req.shared_dicts waiting_for_first_imageset = False assert len(self.shared_dicts) == len(self.pipeline.modules()) # if we had jobs waiting for the first image set to finish, # queue them now that the shared state is available. for job in job_groups: self.work_queue.put((job, worker_runs_post_group, False)) finished_req.reply(Ack()) # check progress and report counts = collections.Counter(measurements[cpmeas.IMAGE, self.STATUS, image_set_number] for image_set_number in image_sets_to_process) self.post_event(AnalysisProgress(counts)) # Are we finished? if counts[self.STATUS_DONE] == len(image_sets_to_process): last_image_number = measurements.get_image_numbers()[-1] measurements.image_set_number = last_image_number if not worker_runs_post_group: self.pipeline.post_group(workspace, {}) workspace = cpw.Workspace(self.pipeline, None, None, None, measurements, None, None) workspace.post_run_display_handler = \ self.post_run_display_handler self.pipeline.post_run(workspace) break measurements.flush() # not done, wait for more work with self.interface_work_cv: while (self.paused or ((not self.cancelled) and self.in_process_queue.empty() and self.finished_queue.empty() and self.received_measurements_queue.empty())): self.interface_work_cv.wait() # wait for a change of status or work to arrive finally: # Note - the measurements file is owned by the queue consumer # after this post_event. # if not acknowledged_thread_start: start_signal.release() if posted_analysis_started: was_cancelled = self.cancelled self.post_event(AnalysisFinished(measurements, was_cancelled)) self.stop_workers() self.analysis_id = False # this will cause the jobserver thread to exit
def do_job(self, job): '''Handle a work request to its completion job - WorkRequest ''' import cellprofiler.pipeline as cpp job_measurements = [] try: send_dictionary = job.wants_dictionary logger.info("Starting job") # Fetch the pipeline and preferences for this analysis if we don't have it current_pipeline, current_preferences = \ self.pipelines_and_preferences.get( self.current_analysis_id, (None, None)) if not current_pipeline: logger.debug("Fetching pipeline and preferences") rep = self.send(PipelinePreferencesRequest( self.current_analysis_id)) logger.debug("Received pipeline and preferences response") preferences_dict = rep.preferences # update preferences to match remote values cpprefs.set_preferences_from_dict(preferences_dict) logger.debug("Loading pipeline") pipeline_blob = rep.pipeline_blob.tostring() current_pipeline = cpp.Pipeline() current_pipeline.loadtxt(StringIO.StringIO(pipeline_blob), raise_on_error=True) logger.debug("Pipeline loaded") current_pipeline.add_listener( self.pipeline_listener.handle_event) current_preferences = rep.preferences self.pipelines_and_preferences[self.current_analysis_id] = ( current_pipeline, current_preferences) else: # update preferences to match remote values cpprefs.set_preferences_from_dict(current_preferences) # Reset the listener's state self.pipeline_listener.reset() logger.debug("Getting initial measurements") # Fetch the path to the intial measurements if needed. current_measurements = self.initial_measurements.get( self.current_analysis_id) if current_measurements is None: logger.debug("Sending initial measurements request") rep = self.send(InitialMeasurementsRequest( self.current_analysis_id)) logger.debug("Got initial measurements") current_measurements = \ self.initial_measurements[self.current_analysis_id] = \ cpmeas.load_measurements_from_buffer(rep.buf) else: logger.debug("Has initial measurements") # Make a copy of the measurements for writing during this job current_measurements = cpmeas.Measurements(copy=current_measurements) all_measurements.add(current_measurements) job_measurements.append(current_measurements) successful_image_set_numbers = [] image_set_numbers = job.image_set_numbers worker_runs_post_group = job.worker_runs_post_group logger.info("Doing job: " + ",".join(map(str, image_set_numbers))) self.pipeline_listener.image_set_number = image_set_numbers[0] if not worker_runs_post_group: # Get the shared state from the first imageset in this run. shared_dicts = self.send( SharedDictionaryRequest(self.current_analysis_id)).dictionaries assert len(shared_dicts) == len(current_pipeline.modules()) for module, new_dict in zip(current_pipeline.modules(), shared_dicts): module.set_dictionary_for_worker(new_dict) # Run prepare group if this is the first image in the group. We do # this here (even if there's no grouping in the pipeline) to ensure # that any changes to the modules' shared state dictionaries get # propagated correctly. should_process = True if current_measurements[cpmeas.IMAGE, cpmeas.GROUP_INDEX, image_set_numbers[0]] == 1: workspace = cpw.Workspace(current_pipeline, None, None, None, current_measurements, None, None) if not current_pipeline.prepare_group( workspace, current_measurements.get_grouping_keys(), image_set_numbers): # exception handled elsewhere, possibly cancelling this run. should_process = False del workspace # process the images if should_process: abort = False for image_set_number in image_set_numbers: gc.collect() try: self.pipeline_listener.image_set_number = image_set_number current_pipeline.run_image_set( current_measurements, image_set_number, self.interaction_handler, self.display_handler, self.cancel_handler) if self.pipeline_listener.should_abort: abort = True break elif self.pipeline_listener.should_skip: # Report skipped image sets as successful so that # analysis can complete. # Report their measurements because some modules # may have provided measurements before skipping. pass successful_image_set_numbers.append(image_set_number) # Send an indication that the image set finished successfully. if send_dictionary: # The jobserver would like a copy of our modules' # run_state dictionaries. ws = cpw.Workspace(current_pipeline, None, None, None, current_measurements, None, None) dicts = [m.get_dictionary_for_worker() for m in current_pipeline.modules()] req = ImageSetSuccessWithDictionary( self.current_analysis_id, image_set_number=image_set_number, shared_dicts = dicts) else: req = ImageSetSuccess( self.current_analysis_id, image_set_number = image_set_number) rep = self.send(req) except cpp.CancelledException: logging.info("Aborting job after cancellation") abort = True except Exception: try: logging.error("Error in pipeline", exc_info=True) if self.handle_exception( image_set_number=image_set_number) == ED_STOP: abort = True break except: logging.error("Error in handling of pipeline exception", exc_info=True) # this is bad. We can't handle nested exceptions # remotely so we just fail on this run. abort = True if abort: current_measurements.close() job_measurements.remove(current_measurements) return if worker_runs_post_group: workspace = cpw.Workspace(current_pipeline, None, current_measurements, None, current_measurements, None, None) workspace.interaction_handler = self.interaction_handler workspace.cancel_handler = self.cancel_handler workspace.post_group_display_handler = \ self.post_group_display_handler # There might be an exception in this call, but it will be # handled elsewhere, and there's nothing we can do for it # here. current_pipeline.post_group( workspace, current_measurements.get_grouping_keys()) # send measurements back to server req = MeasurementsReport(self.current_analysis_id, buf=current_measurements.file_contents(), image_set_numbers=image_set_numbers) rep = self.send(req) except cpp.CancelledException: # Main thread received shutdown signal raise except Exception: logging.error("Error in worker", exc_info=True) if self.handle_exception() == ED_STOP: raise cpp.CancelledException("Cancelling after user-requested stop") finally: # Clean up any measurements owned by us for m in job_measurements: m.close()
def do_job(self, job): '''Handle a work request to its completion job - WorkRequest ''' import cellprofiler.pipeline as cpp job_measurements = [] try: send_dictionary = job.wants_dictionary logger.info("Starting job") # Fetch the pipeline and preferences for this analysis if we don't have it current_pipeline, current_preferences = \ self.pipelines_and_preferences.get( self.current_analysis_id, (None, None)) if not current_pipeline: logger.debug("Fetching pipeline and preferences") rep = self.send( PipelinePreferencesRequest(self.current_analysis_id)) logger.debug("Received pipeline and preferences response") preferences_dict = rep.preferences # update preferences to match remote values cpprefs.set_preferences_from_dict(preferences_dict) logger.debug("Loading pipeline") pipeline_blob = rep.pipeline_blob.tostring() current_pipeline = cpp.Pipeline() current_pipeline.loadtxt(StringIO.StringIO(pipeline_blob), raise_on_error=True) logger.debug("Pipeline loaded") current_pipeline.add_listener( self.pipeline_listener.handle_event) current_preferences = rep.preferences self.pipelines_and_preferences[self.current_analysis_id] = ( current_pipeline, current_preferences) else: # update preferences to match remote values cpprefs.set_preferences_from_dict(current_preferences) # Reset the listener's state self.pipeline_listener.reset() logger.debug("Getting initial measurements") # Fetch the path to the intial measurements if needed. current_measurements = self.initial_measurements.get( self.current_analysis_id) if current_measurements is None: logger.debug("Sending initial measurements request") rep = self.send( InitialMeasurementsRequest(self.current_analysis_id)) logger.debug("Got initial measurements") current_measurements = \ self.initial_measurements[self.current_analysis_id] = \ cpmeas.load_measurements_from_buffer(rep.buf) else: logger.debug("Has initial measurements") # Make a copy of the measurements for writing during this job current_measurements = cpmeas.Measurements( copy=current_measurements) all_measurements.add(current_measurements) job_measurements.append(current_measurements) successful_image_set_numbers = [] image_set_numbers = job.image_set_numbers worker_runs_post_group = job.worker_runs_post_group logger.info("Doing job: " + ",".join(map(str, image_set_numbers))) self.pipeline_listener.image_set_number = image_set_numbers[0] if not worker_runs_post_group: # Get the shared state from the first imageset in this run. shared_dicts = self.send( SharedDictionaryRequest( self.current_analysis_id)).dictionaries assert len(shared_dicts) == len(current_pipeline.modules()) for module, new_dict in zip(current_pipeline.modules(), shared_dicts): module.set_dictionary_for_worker(new_dict) # Run prepare group if this is the first image in the group. We do # this here (even if there's no grouping in the pipeline) to ensure # that any changes to the modules' shared state dictionaries get # propagated correctly. should_process = True if current_measurements[cpmeas.IMAGE, cpmeas.GROUP_INDEX, image_set_numbers[0]] == 1: workspace = cpw.Workspace(current_pipeline, None, None, None, current_measurements, None, None) if not current_pipeline.prepare_group( workspace, current_measurements.get_grouping_keys(), image_set_numbers): # exception handled elsewhere, possibly cancelling this run. should_process = False del workspace # process the images if should_process: abort = False for image_set_number in image_set_numbers: gc.collect() try: self.pipeline_listener.image_set_number = image_set_number last_workspace = current_pipeline.run_image_set( current_measurements, image_set_number, self.interaction_handler, self.display_handler, self.cancel_handler) if self.pipeline_listener.should_abort: abort = True break elif self.pipeline_listener.should_skip: # Report skipped image sets as successful so that # analysis can complete. # Report their measurements because some modules # may have provided measurements before skipping. pass successful_image_set_numbers.append(image_set_number) # Send an indication that the image set finished successfully. if send_dictionary: # The jobserver would like a copy of our modules' # run_state dictionaries. dicts = [ m.get_dictionary_for_worker() for m in current_pipeline.modules() ] req = ImageSetSuccessWithDictionary( self.current_analysis_id, image_set_number=image_set_number, shared_dicts=dicts) else: req = ImageSetSuccess( self.current_analysis_id, image_set_number=image_set_number) rep = self.send(req) except cpp.CancelledException: logging.info("Aborting job after cancellation") abort = True except Exception: try: logging.error("Error in pipeline", exc_info=True) if self.handle_exception( image_set_number=image_set_number ) == ED_STOP: abort = True break except: logging.error( "Error in handling of pipeline exception", exc_info=True) # this is bad. We can't handle nested exceptions # remotely so we just fail on this run. abort = True if abort: current_measurements.close() job_measurements.remove(current_measurements) return if worker_runs_post_group: last_workspace.interaction_handler =\ self.interaction_handler last_workspace.cancel_handler = self.cancel_handler last_workspace.post_group_display_handler = \ self.post_group_display_handler # There might be an exception in this call, but it will be # handled elsewhere, and there's nothing we can do for it # here. current_pipeline.post_group( last_workspace, current_measurements.get_grouping_keys()) del last_workspace # send measurements back to server req = MeasurementsReport(self.current_analysis_id, buf=current_measurements.file_contents(), image_set_numbers=image_set_numbers) rep = self.send(req) except cpp.CancelledException: # Main thread received shutdown signal raise except Exception: logging.error("Error in worker", exc_info=True) if self.handle_exception() == ED_STOP: raise cpp.CancelledException( "Cancelling after user-requested stop") finally: # Clean up any measurements owned by us for m in job_measurements: m.close()
def test_03_08_a_sad_moment(self): # # Run using the good pipeline, but change one of the URLs so # an exception is thrown. # self.awthread = self.AWThread(self.announce_addr) self.awthread.start() self.set_work_socket() self.awthread.ex(self.awthread.aw.do_job, cpanalysis.WorkReply( image_set_numbers = [2, 3], worker_runs_post_group = False, wants_dictionary = False)) # # The worker should ask for the pipeline and preferences next. # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.PipelinePreferencesRequest) self.assertEqual(req.analysis_id, self.analysis_id) input_dir = os.path.join(example_images_directory(), "ExampleSBSImages") cpprefs.set_default_image_directory(input_dir) preferences = {cpprefs.DEFAULT_IMAGE_DIRECTORY: cpprefs.config_read(cpprefs.DEFAULT_IMAGE_DIRECTORY) } rep = cpanalysis.Reply( pipeline_blob = np.array(GOOD_PIPELINE), preferences = preferences) req.reply(rep) # # The worker asks for the initial measurements. # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.InitialMeasurementsRequest) self.assertEqual(req.analysis_id, self.analysis_id) m = get_measurements_for_good_pipeline(nimages=3) m[cpmeas.IMAGE, M_IMAGE_SET, 2] = np.zeros(100, np.uint8) try: req.reply(cpanalysis.Reply(buf = m.file_contents())) finally: m.close() # # Next, the worker asks for the shared dictionary # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.SharedDictionaryRequest) shared_dictionaries = [{ ("foo%d" % i):"bar%d" % i} for i in range(1,8)] rep = cpanalysis.SharedDictionaryReply( dictionaries = shared_dictionaries) req.reply(rep) # # The worker should choke somewhere in NamesAndTypes, but we # tell the worker to skip the rest of the imageset. # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.ExceptionReport) req.reply(cpanalysis.ExceptionPleaseDebugReply(disposition = ED_SKIP)) # # The worker should send ImageSetSuccess for image set 2 anyway. # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.ImageSetSuccess) self.assertEqual(req.image_set_number, 2) req.reply(cpanalysis.Ack()) # # And then it tells us about image set 3 # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.ImageSetSuccess) self.assertEqual(req.image_set_number, 3) req.reply(cpanalysis.Ack()) # # The worker should then report the measurements for both 2 and 3 # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.MeasurementsReport) self.assertSequenceEqual(req.image_set_numbers, [2, 3]) m = cpmeas.load_measurements_from_buffer(req.buf) # # Spot check for some expected stuff # self.assertTrue(m.has_feature(cpmeas.IMAGE, C_COUNT+"_Nuclei")) self.assertTrue(m.has_feature("Nuclei", M_LOCATION_CENTER_X)) self.assertTrue(m.has_feature("Nuclei", "AreaShape_Area")) # # The count for the skipped image should be None # count = m[cpmeas.IMAGE, C_COUNT + "_Nuclei", 2] self.assertIsNone(count) count = m[cpmeas.IMAGE, C_COUNT + "_Nuclei", 3] center_x = m["Nuclei", M_LOCATION_CENTER_X, 3] self.assertEqual(count, len(center_x)) req.reply(cpanalysis.Ack()) self.awthread.ecute()
def test_03_06_the_happy_path_chapter_2(self): # # Give the worker image sets # 2 and 3 and tell it to run post_group # self.awthread = self.AWThread(self.announce_addr) self.awthread.start() self.set_work_socket() self.awthread.ex(self.awthread.aw.do_job, cpanalysis.WorkReply( image_set_numbers = [2, 3], worker_runs_post_group = True, wants_dictionary = False)) # # The worker should ask for the pipeline and preferences next. # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.PipelinePreferencesRequest) self.assertEqual(req.analysis_id, self.analysis_id) input_dir = os.path.join(example_images_directory(), "ExampleSBSImages") cpprefs.set_default_image_directory(input_dir) preferences = {cpprefs.DEFAULT_IMAGE_DIRECTORY: cpprefs.config_read(cpprefs.DEFAULT_IMAGE_DIRECTORY) } rep = cpanalysis.Reply( pipeline_blob = np.array(DISPLAY_PIPELINE), preferences = preferences) req.reply(rep) # # The worker asks for the initial measurements. # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.InitialMeasurementsRequest) self.assertEqual(req.analysis_id, self.analysis_id) m = get_measurements_for_good_pipeline(nimages=3) try: req.reply(cpanalysis.Reply(buf = m.file_contents())) finally: m.close() # # In group mode, the worker issues a display request and constructs # its own dictonaries # for image_number in (2, 3): # # The worker sends a display request for FlipAndRotate # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.DisplayRequest) req.reply(cpanalysis.Ack()) # # The worker sends ImageSetSuccess. # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.ImageSetSuccess) self.assertEqual(req.image_set_number, image_number) req.reply(cpanalysis.Ack()) # # The worker sends a DisplayPostGroup request for FlipAndRotate # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.DisplayPostGroupRequest) self.assertEqual(req.image_set_number, 3) req.reply(cpanalysis.Ack()) # # The worker sends a measurement report for image sets 2 and 3 # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.MeasurementsReport) self.assertSequenceEqual(req.image_set_numbers, [2, 3]) m = cpmeas.load_measurements_from_buffer(req.buf) # # Spot check for some expected stuff # self.assertTrue(m.has_feature(cpmeas.IMAGE, C_COUNT+"_Nuclei")) self.assertTrue(m.has_feature("Nuclei", M_LOCATION_CENTER_X)) self.assertTrue(m.has_feature("Nuclei", "AreaShape_Area")) req.reply(cpanalysis.Ack()) self.awthread.ecute()
def test_03_05_the_happy_path_chapter_1(self): # # Run the worker clear through to the end # for the first imageset # self.awthread = self.AWThread(self.announce_addr) self.awthread.start() self.set_work_socket() self.awthread.ex(self.awthread.aw.do_job, cpanalysis.WorkReply( image_set_numbers = [1], worker_runs_post_group = False, wants_dictionary = True)) # # The worker should ask for the pipeline and preferences next. # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.PipelinePreferencesRequest) self.assertEqual(req.analysis_id, self.analysis_id) input_dir = os.path.join(example_images_directory(), "ExampleSBSImages") cpprefs.set_default_image_directory(input_dir) preferences = {cpprefs.DEFAULT_IMAGE_DIRECTORY: cpprefs.config_read(cpprefs.DEFAULT_IMAGE_DIRECTORY) } rep = cpanalysis.Reply( pipeline_blob = np.array(DISPLAY_PIPELINE), preferences = preferences) req.reply(rep) # # The worker asks for the initial measurements. # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.InitialMeasurementsRequest) self.assertEqual(req.analysis_id, self.analysis_id) m = get_measurements_for_good_pipeline() try: req.reply(cpanalysis.Reply(buf = m.file_contents())) finally: m.close() # # Next, the worker asks for the shared dictionary # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.SharedDictionaryRequest) shared_dictionaries = [{ ("foo%d" % i):"bar%d" % i} for i in range(1,8)] rep = cpanalysis.SharedDictionaryReply( dictionaries = shared_dictionaries) req.reply(rep) # # The worker sends a display request for FlipAndRotate # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.DisplayRequest) self.assertEqual(req.image_set_number, 1) d = req.display_data_dict # Possibly, this will break if someone edits FlipAndRotate. Sorry. self.assertItemsEqual(d.keys(), ['vmax', 'output_image_pixel_data', 'image_pixel_data', 'vmin']) self.assertIsInstance(d['output_image_pixel_data'], np.ndarray) req.reply(cpanalysis.Ack()) # # The worker sends ImageSetSuccessWithDictionary. # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.ImageSetSuccessWithDictionary) self.assertEqual(req.image_set_number, 1) for expected, actual in zip(shared_dictionaries, req.shared_dicts): self.assertDictEqual(expected, actual) req.reply(cpanalysis.Ack()) # # The worker sends the measurement report # req = self.awthread.recv(self.work_socket) self.assertIsInstance(req, cpanalysis.MeasurementsReport) self.assertSequenceEqual(req.image_set_numbers, [1]) m = cpmeas.load_measurements_from_buffer(req.buf) # # Spot check for some expected stuff # self.assertTrue(m.has_feature(cpmeas.IMAGE, C_COUNT+"_Nuclei")) self.assertTrue(m.has_feature("Nuclei", M_LOCATION_CENTER_X)) self.assertTrue(m.has_feature("Nuclei", "AreaShape_Area")) req.reply(cpanalysis.Ack()) self.awthread.ecute()