def test_can_raise_exception_in_thread(self): try: raise TestError('test') except Exception: exc_info = sys.exc_info() with self.assertRaises(TestError): raise_exception_in_thread(exc_info) self.assertTrue(self.mock_thread.used)
def mainloop(self): self._mainloop_logger = logging.getLogger( 'BLACS.AnalysisSubmission.mainloop') # Ignore signals until save data is restored: while self.inqueue.get()[0] != 'save data restored': pass timeout = 10 while True: try: try: signal, data = self.inqueue.get(timeout=timeout) except queue.Empty: timeout = 10 # Periodic checking of connectivity and resending of files. # Don't trigger a re-check if we already failed a connectivity # check within the last second: if (time.time() - self.time_of_last_connectivity_check) > 1: signal = 'check/retry' else: continue if signal == 'check/retry': self.check_connectivity() if self.server_online == 'online': self.submit_waiting_files() elif signal == 'file': if self.send_to_server: self._waiting_for_submission.append(data) if self.server_online != 'online': # Don't stack connectivity checks if many files are # arriving. If we failed a connectivity check less # than a second ago then don't check again. if (time.time() - self.time_of_last_connectivity_check) > 1: self.check_connectivity() else: # But do queue up a check for when we have # been idle for one second: timeout = 1 if self.server_online == 'online': self.submit_waiting_files() elif signal == 'close': break elif signal == 'save data restored': continue else: raise ValueError('Invalid signal: %s' % str(signal)) self._mainloop_logger.info('Processed signal: %s' % str(signal)) except Exception: # Raise in a thread for visibility, but keep going raise_exception_in_thread(sys.exc_info()) self._mainloop_logger.exception( "Exception in mainloop, continuing")
def __init__(self, application): self.qt_application = application #self.qt_application.aboutToQuit.connect(self.destroy) self._relaunch = False self.exiting = False self.exit_complete = False logger.info('Loading BLACS ui') #self.ui = BLACSWindow(self).ui loader = UiLoader() loader.registerCustomWidget(QueueTreeview) #loader.registerCustomPromotion('BLACS',BLACSWindow) self.ui = loader.load(os.path.join(BLACS_DIR, 'main.ui'), BLACSWindow()) logger.info('BLACS ui loaded') self.ui.blacs = self self.tab_widgets = {} self.exp_config = exp_config # Global variable self.settings_path = settings_path # Global variable self.connection_table = connection_table # Global variable self.connection_table_h5file = self.exp_config.get( 'paths', 'connection_table_h5') self.connection_table_labscript = self.exp_config.get( 'paths', 'connection_table_py') # Setup the UI self.ui.main_splitter.setStretchFactor(0, 0) self.ui.main_splitter.setStretchFactor(1, 1) self.tablist = {} self.panes = {} self.settings_dict = {} # Find which devices are connected to BLACS, and what their labscript class names are: logger.info('finding connected devices in connection table') self.attached_devices = self.connection_table.get_attached_devices() # Store the panes in a dictionary for easy access self.panes[ 'tab_top_vertical_splitter'] = self.ui.tab_top_vertical_splitter self.panes[ 'tab_bottom_vertical_splitter'] = self.ui.tab_bottom_vertical_splitter self.panes['tab_horizontal_splitter'] = self.ui.tab_horizontal_splitter self.panes['main_splitter'] = self.ui.main_splitter # Get settings to restore logger.info('Loading front panel settings') self.front_panel_settings = FrontPanelSettings(self.settings_path, self.connection_table) self.front_panel_settings.setup(self) settings, question, error, tab_data = self.front_panel_settings.restore( ) # TODO: handle question/error cases logger.info('restoring window data') self.restore_window(tab_data) #splash.update_text('Creating the device tabs...') # Create the notebooks logger.info('Creating tab widgets') for i in range(4): self.tab_widgets[i] = DragDropTabWidget(self.tab_widget_ids) self.tab_widgets[i].setElideMode(Qt.ElideRight) getattr(self.ui, 'tab_container_%d' % i).addWidget(self.tab_widgets[i]) logger.info('Instantiating devices') self.failed_device_settings = {} for device_name, labscript_device_class_name in self.attached_devices.items( ): try: self.settings_dict.setdefault(device_name, {"device_name": device_name}) # add common keys to settings: self.settings_dict[device_name][ "connection_table"] = self.connection_table self.settings_dict[device_name][ "front_panel_settings"] = settings[ device_name] if device_name in settings else {} self.settings_dict[device_name]["saved_data"] = tab_data[ device_name]['data'] if device_name in tab_data else {} # Instantiate the device logger.info('instantiating %s' % device_name) TabClass = labscript_devices.get_BLACS_tab( labscript_device_class_name) self.tablist[device_name] = TabClass( self.tab_widgets[0], self.settings_dict[device_name]) except Exception: self.failed_device_settings[device_name] = { "front_panel": self.settings_dict[device_name]["front_panel_settings"], "save_data": self.settings_dict[device_name]["saved_data"] } del self.settings_dict[device_name] del self.attached_devices[device_name] self.connection_table.remove_device(device_name) raise_exception_in_thread(sys.exc_info()) logger.info('reordering tabs') self.order_tabs(tab_data) logger.info('starting analysis submission thread') # setup analysis submission self.analysis_submission = AnalysisSubmission(self, self.ui) if 'analysis_data' not in tab_data['BLACS settings']: tab_data['BLACS settings']['analysis_data'] = {} else: tab_data['BLACS settings']['analysis_data'] = eval( tab_data['BLACS settings']['analysis_data']) self.analysis_submission.restore_save_data( tab_data['BLACS settings']["analysis_data"]) logger.info('starting queue manager thread') # Setup the QueueManager self.queue = QueueManager(self, self.ui) if 'queue_data' not in tab_data['BLACS settings']: tab_data['BLACS settings']['queue_data'] = {} else: # quick fix for qt objects not loading that were saved before qtutil 2 changes try: tab_data['BLACS settings']['queue_data'] = eval( tab_data['BLACS settings']['queue_data']) except NameError: tab_data['BLACS settings']['queue_data'] = {} self.queue.restore_save_data(tab_data['BLACS settings']['queue_data']) logger.info('instantiating plugins') # setup the plugin system settings_pages = [] self.plugins = {} plugin_settings = eval( tab_data['BLACS settings']['plugin_data'] ) if 'plugin_data' in tab_data['BLACS settings'] else {} for module_name, module in plugins.modules.items(): try: # instantiate the plugin self.plugins[module_name] = module.Plugin( plugin_settings[module_name] if module_name in plugin_settings else {}) except Exception: logger.exception( 'Could not instantiate plugin \'%s\'. Skipping') blacs_data = { 'exp_config': self.exp_config, 'ui': self.ui, 'set_relaunch': self.set_relaunch, 'plugins': self.plugins, 'connection_table_h5file': self.connection_table_h5file, 'connection_table_labscript': self.connection_table_labscript, 'experiment_queue': self.queue } def create_menu(parent, menu_parameters): if 'name' in menu_parameters: if 'menu_items' in menu_parameters: child = parent.addMenu(menu_parameters['name']) for child_menu_params in menu_parameters['menu_items']: create_menu(child, child_menu_params) else: if 'icon' in menu_parameters: child = parent.addAction( QIcon(menu_parameters['icon']), menu_parameters['name']) else: child = parent.addAction(menu_parameters['name']) if 'action' in menu_parameters: child.triggered.connect(menu_parameters['action']) elif 'separator' in menu_parameters: parent.addSeparator() # setup the Notification system logger.info('setting up notification system') self.notifications = Notifications(blacs_data) settings_callbacks = [] for module_name, plugin in self.plugins.items(): try: # Setup settings page settings_pages.extend(plugin.get_setting_classes()) # Setup menu if plugin.get_menu_class(): # must store a reference or else the methods called when the menu actions are triggered # (contained in this object) will be garbaged collected menu = plugin.get_menu_class()(blacs_data) create_menu(self.ui.menubar, menu.get_menu_items()) plugin.set_menu_instance(menu) # Setup notifications plugin_notifications = {} for notification_class in plugin.get_notification_classes(): self.notifications.add_notification(notification_class) plugin_notifications[ notification_class] = self.notifications.get_instance( notification_class) plugin.set_notification_instances(plugin_notifications) # Register callbacks callbacks = plugin.get_callbacks() # save the settings_changed callback in a separate list for setting up later if isinstance(callbacks, dict) and 'settings_changed' in callbacks: settings_callbacks.append(callbacks['settings_changed']) except Exception: logger.exception( 'Plugin \'%s\' error. Plugin may not be functional.' % module_name) # setup the BLACS preferences system logger.info('setting up preferences system') self.settings = Settings(file=self.settings_path, parent=self.ui, page_classes=settings_pages) for callback in settings_callbacks: self.settings.register_callback(callback) # update the blacs_data dictionary with the settings system blacs_data['settings'] = self.settings for module_name, plugin in self.plugins.items(): try: plugin.plugin_setup_complete(blacs_data) except Exception: # backwards compatibility for old plugins try: plugin.plugin_setup_complete() logger.warning( 'Plugin \'%s\' using old API. Please update Plugin.plugin_setup_complete method to accept a dictionary of blacs_data as the only argument.' % module_name) except Exception: logger.exception( 'Plugin \'%s\' error. Plugin may not be functional.' % module_name) # Connect menu actions self.ui.actionOpenPreferences.triggered.connect( self.on_open_preferences) self.ui.actionSave.triggered.connect(self.on_save_front_panel) self.ui.actionOpen.triggered.connect(self.on_load_front_panel) self.ui.actionExit.triggered.connect(self.ui.close) # Connect the windows AppId stuff: if os.name == 'nt': self.ui.newWindow.connect(set_win_appusermodel) logger.info('showing UI') self.ui.show()
def __init__(self, h5file, logging_prefix=None, exceptions_in_thread=False): """Object to represent a connection table. Set logging prefix if you desire logging. Log used will be <prefix>.ConnectionTable""" self.filepath = h5file self.logger = None if logging_prefix is not None: self.logger = logging.getLogger( '{}.ConnectionTable'.format(logging_prefix)) self.logger.debug('Parsing connection table from %s' % h5file) self.toplevel_children = {} self.table = {} self.master_pseudoclock = None self.raw_table = np.empty(0) try: with h5py.File(h5file, 'r') as hdf5_file: try: dataset = hdf5_file['connection table'] except Exception: msg = 'could not open connection table dataset in %s' % h5file if self.logger: self.logger.error(msg) if exceptions_in_thread: raise_exception_in_thread(sys.exc_info()) else: raise return self.raw_table = dataset[:] try: self.master_pseudoclock = _ensure_str( dataset.attrs['master_pseudoclock']) except KeyError: pass try: all_connections = [ Connection(raw_row) for raw_row in self.raw_table ] self.table = { connection.name: connection for connection in all_connections } for name, connection in self.table.items(): connection._populate_relatives(self.table) if connection.parent_port is None: self.toplevel_children[name] = connection except Exception: msg = 'Could not parse connection table in %s' % h5file if self.logger: self.logger.error(msg) if exceptions_in_thread: raise_exception_in_thread(sys.exc_info()) else: raise except Exception: msg = 'Could not open connection table file %s' % h5file if self.logger: self.logger.exception(msg) if exceptions_in_thread: raise_exception_in_thread(sys.exc_info()) else: raise
def manage(self): logger = logging.getLogger('BLACS.queue_manager.thread') # While the program is running! logger.info('starting') # HDF5 prints lots of errors by default, for things that aren't # actually errors. These are silenced on a per thread basis, # and automatically silenced in the main thread when h5py is # imported. So we'll silence them in this thread too: h5py._errors.silence_errors() # This name stores the queue currently being used to # communicate with tabs, so that abort signals can be put # to it when those tabs never respond and are restarted by # the user. self.current_queue = Queue.Queue() #TODO: put in general configuration timeout_limit = 300 #seconds self.set_status("Idle") while self.manager_running: # If the pause button is pushed in, sleep if self.manager_paused: if self.get_status() == "Idle": logger.info('Paused') self.set_status("Queue Paused") time.sleep(1) continue # Get the top file try: path = self.get_next_file() now_running_text = 'Now running: <b>%s</b>'%os.path.basename(path) self.set_status(now_running_text) logger.info('Got a file: %s'%path) except: # If no files, sleep for 1s, self.set_status("Idle") time.sleep(1) continue devices_in_use = {} transition_list = {} start_time = time.time() self.current_queue = Queue.Queue() # Function to be run when abort button is clicked def abort_function(): try: # Set device name to "Queue Manager" which will never be a labscript device name # as it is not a valid python variable name (has a space in it!) self.current_queue.put(['Queue Manager', 'abort']) except Exception: logger.exception('Could not send abort message to the queue manager') def restart_function(device_name): try: self.current_queue.put([device_name, 'restart']) except Exception: logger.exception('Could not send restart message to the queue manager for device %s'%device_name) ########################################################################################################################################## # transition to buffered # ########################################################################################################################################## try: # A Queue for event-based notification when the tabs have # completed transitioning to buffered: timed_out = False error_condition = False abort = False restarted = False self.set_status(now_running_text+"<br>Transitioning to Buffered") # Enable abort button, and link in current_queue: inmain(self._ui.queue_abort_button.clicked.connect,abort_function) inmain(self._ui.queue_abort_button.setEnabled,True) # Ready to run file: assume that the file has _not_ been compiled and compile it # Extract script globals, and update them from the blacs mantained dictionary of globals. shot_globals = get_shot_globals(path) shot_globals.update(self.DynamicGlobals) with h5py.File(path, "a") as hdf5_file: set_shot_globals(hdf5_file, shot_globals) # Compile file compile_h5(path) # Run file with h5py.File(path, "r+") as hdf5_file: min_time = hdf5_file.attrs['min_time'] h5_file_devices = hdf5_file['devices/'].keys() for name in h5_file_devices: try: # Connect restart signal from tabs to current_queue and transition the device to buffered mode success = self.transition_device_to_buffered(name,transition_list,path,restart_function) if not success: logger.error('%s has an error condition, aborting run' % name) error_condition = True break except Exception as e: logger.error('Exception while transitioning %s to buffered mode. Exception was: %s'%(name,str(e))) error_condition = True break devices_in_use = transition_list.copy() while transition_list and not error_condition: try: # Wait for a device to transtition_to_buffered: logger.debug('Waiting for the following devices to finish transitioning to buffered mode: %s'%str(transition_list)) device_name, result = self.current_queue.get(timeout=2) #Handle abort button signal if device_name == 'Queue Manager' and result == 'abort': # we should abort the run logger.info('abort signal received from GUI') abort = True break if result == 'fail': logger.info('abort signal received during transition to buffered of %s' % device_name) error_condition = True break elif result == 'restart': logger.info('Device %s was restarted, aborting shot.'%device_name) restarted = True break logger.debug('%s finished transitioning to buffered mode' % device_name) # The tab says it's done, but does it have an error condition? if self.get_device_error_state(device_name,transition_list): logger.error('%s has an error condition, aborting run' % device_name) error_condition = True break del transition_list[device_name] except Queue.Empty: # It's been 2 seconds without a device finishing # transitioning to buffered. Is there an error? for name in transition_list: if self.get_device_error_state(name,transition_list): error_condition = True break if error_condition: break # Has programming timed out? if time.time() - start_time > timeout_limit: logger.error('Transitioning to buffered mode timed out') timed_out = True break # Handle if we broke out of loop due to timeout or error: if timed_out or error_condition or abort or restarted: # Pause the queue, re add the path to the top of the queue, and set a status message! # only if we aren't responding to an abort click if not abort: self.manager_paused = True self.prepend(path) if timed_out: self.set_status("Device programming timed out. Queue Paused...") elif abort: self.set_status("Shot aborted") elif restarted: self.set_status('A device was restarted during transition_to_buffered. Shot aborted') else: self.set_status("One or more devices is in an error state. Queue Paused...") # Abort the run for all devices in use: # need to recreate the queue here because we don't want to hear from devices that are still transitioning to buffered mode self.current_queue = Queue.Queue() for tab in devices_in_use.values(): # We call abort buffered here, because if each tab is either in mode=BUFFERED or transition_to_buffered failed in which case # it should have called abort_transition_to_buffered itself and returned to manual mode # Since abort buffered will only run in mode=BUFFERED, and the state is not queued indefinitely (aka it is deleted if we are not in mode=BUFFERED) # this is the correct method call to make for either case tab.abort_buffered(self.current_queue) # We don't need to check the results of this function call because it will either be successful, or raise a visible error in the tab. # disconnect restart signal from tabs inmain(tab.disconnect_restart_receiver,restart_function) # disconnect abort button and disable inmain(self._ui.queue_abort_button.clicked.disconnect,abort_function) inmain(self._ui.queue_abort_button.setEnabled,False) # Start a new iteration continue ########################################################################################################################################## # SCIENCE! # ########################################################################################################################################## # Get front panel data, but don't save it to the h5 file until the experiment ends: states,tab_positions,window_data,plugin_data = self.BLACS.front_panel_settings.get_save_data() self.set_status(now_running_text+"<br>Running...(program time: %.3fs)"%(time.time() - start_time)) # A Queue for event-based notification of when the experiment has finished. experiment_finished_queue = Queue.Queue() logger.debug('About to start the master pseudoclock') # Do not start until delay time specificed by last sequence has expired self._timer.wait() # Start the timer to block until the next run starts self._timer.start( min_time, countdown_queue=self.BLACS._countdown_queue, countdown_mode='precent_done') run_time = time.localtime() #TODO: fix potential race condition if BLACS is closing when this line executes? self.BLACS.tablist[self.master_pseudoclock].start_run(experiment_finished_queue) # Wait for notification of the end of run: abort = False restarted = False done = False while not (abort or restarted or done): try: done = experiment_finished_queue.get(timeout=0.5) == 'done' except Queue.Empty: pass try: # Poll self.current_queue for abort signal from button or device restart device_name, result = self.current_queue.get_nowait() if (device_name == 'Queue Manager' and result == 'abort'): abort = True if result == 'restart': restarted = True # Check for error states in tabs for device_name, tab in devices_in_use.items(): if self.get_device_error_state(device_name,devices_in_use): restarted = True except Queue.Empty: pass if abort or restarted: for devicename, tab in devices_in_use.items(): if tab.mode == MODE_BUFFERED: tab.abort_buffered(self.current_queue) # disconnect restart signal from tabs inmain(tab.disconnect_restart_receiver,restart_function) # Disable abort button inmain(self._ui.queue_abort_button.clicked.disconnect,abort_function) inmain(self._ui.queue_abort_button.setEnabled,False) if restarted: self.manager_paused = True self.prepend(path) self.set_status("Device restarted mid-shot. Shot aborted, Queue paused.") elif abort: self.set_status("Shot aborted") if abort or restarted: # after disabling the abort button, we now start a new iteration continue logger.info('Run complete') self.set_status(now_running_text+"<br>Sequence done, saving data...") # End try/except block here except Exception: logger.exception("Error in queue manager execution. Queue paused.") # clean up the h5 file self.manager_paused = True # clean the h5 file: self.clean_h5_file(path, 'temp.h5') try: os.remove(path) os.rename('temp.h5', path) except WindowsError if platform.system() == 'Windows' else None: logger.warning('Couldn\'t delete failed run file %s, another process may be using it. Using alternate filename for second attempt.'%path) os.rename('temp.h5', path.replace('.h5','_retry.h5')) path = path.replace('.h5','_retry.h5') # Put it back at the start of the queue: self.prepend(path) # Need to put devices back in manual mode self.current_queue = Queue.Queue() for devicename, tab in devices_in_use.items(): if tab.mode == MODE_BUFFERED or tab.mode == MODE_TRANSITION_TO_BUFFERED: tab.abort_buffered(self.current_queue) # disconnect restart signal from tabs inmain(tab.disconnect_restart_receiver,restart_function) self.set_status("Error occured in Queue Manager. Queue Paused. \nPlease make sure all devices are back in manual mode before unpausing the queue") # disconnect and disable abort button inmain(self._ui.queue_abort_button.clicked.disconnect,abort_function) inmain(self._ui.queue_abort_button.setEnabled,False) # Start a new iteration continue ########################################################################################################################################## # SCIENCE OVER! # ########################################################################################################################################## ########################################################################################################################################## # Transition to manual # ########################################################################################################################################## # start new try/except block here try: with h5py.File(path,'r+') as hdf5_file: self.BLACS.front_panel_settings.store_front_panel_in_h5(hdf5_file,states,tab_positions,window_data,plugin_data,save_conn_table = False) with h5py.File(path,'r+') as hdf5_file: data_group = hdf5_file['/'].create_group('data') # stamp with the run time of the experiment hdf5_file.attrs['run time'] = time.strftime('%Y%m%dT%H%M%S',run_time) for devicename, tab in devices_in_use.items(): if tab.mode == MODE_BUFFERED: tab.transition_to_manual(self.current_queue) error_condition = False transition_list = devices_in_use.copy() while transition_list and not error_condition: try: # Wait for a device to transition_to_manual: logger.debug('Waiting for the following devices to finish transitioning to manual mode: %s'%str(transition_list)) device_name, result = self.current_queue.get(timeout=2) if (device_name == 'Queue Manager' and result == 'abort'): # Ignore any abort signals left in the queue, it # is too late to abort in any case continue logger.debug('%s finished transitioning to manual mode' % device_name) if result == 'fail': error_condition = True if result == 'restart': error_condition = True if self.get_device_error_state(device_name, transition_list): error_condition = True del transition_list[device_name] # disconnect restart signal from tab inmain(tab.disconnect_restart_receiver,restart_function) except queue.Empty: # It's been 2 seconds without a device finishing # transitioning to manual. Is there an error? for name in transition_list: if self.get_device_error_state(name,transition_list): error_condition = True if error_condition: break if error_condition: self.set_status("Error during transtion to manual. Queue Paused.") # TODO: Kind of dodgy raising an exception here... raise Exception('A device failed during transition to manual') # All data written, now run all PostProcessing functions SavedFunctions = labscript_utils.h5_scripting.get_all_saved_functions(path) with h5py.File(path, 'r+') as hdf5_file: for SavedFunction in SavedFunctions: try: result = SavedFunction.custom_call(hdf5_file, **shot_globals) except Exception: import zprocess zprocess.raise_exception_in_thread(sys.exc_info()) result = {} logger.error('Post Processing function did not execute correctly') try: self.DynamicGlobals.update(result) except: logger.error('Post Processing function did not return a dict type') inmain(self._ui.Globals_tableWidget.setRowCount, len(self.DynamicGlobals)) for i, key in enumerate(self.DynamicGlobals): inmain(self._ui.Globals_tableWidget.setItem, i, 0, QTableWidgetItem(key)) inmain(self._ui.Globals_tableWidget.setItem, i, 1, QTableWidgetItem( str(self.DynamicGlobals[key]) )) except Exception as e: logger.exception("Error in queue manager execution. Queue paused.") # clean up the h5 file self.manager_paused = True # clean the h5 file: self.clean_h5_file(path, 'temp.h5') # self.SavedFunctions = labscript_utils.h5_scripting.get_all_saved_functions(path(path, 'temp.h5')) try: os.remove(path) os.rename('temp.h5', path) except WindowsError if platform.system() == 'Windows' else None: logger.warning('Couldn\'t delete failed run file %s, another process may be using it. Using alternate filename for second attempt.'%path) os.rename('temp.h5', path.replace('.h5','_retry.h5')) path = path.replace('.h5','_retry.h5') # Put it back at the start of the queue: self.prepend(path) # Need to put devices back in manual mode. Since the experiment is over before this try/except block begins, we can # safely call transition_to_manual() on each device tab self.current_queue = Queue.Queue() for devicename, tab in devices_in_use.items(): if tab.mode == MODE_BUFFERED: tab.transition_to_manual(self.current_queue) # disconnect restart signal from tabs inmain(tab.disconnect_restart_receiver,restart_function) self.set_status("Error occured in Queue Manager. Queue Paused. \nPlease make sure all devices are back in manual mode before unpausing the queue") continue ########################################################################################################################################## # Analysis Submission # ########################################################################################################################################## logger.info('All devices are back in static mode.') # Submit to the analysis server self.BLACS.analysis_submission.get_queue().put(['file', path]) ########################################################################################################################################## # Plugin callbacks # ########################################################################################################################################## for plugin in self.BLACS.plugins.values(): callbacks = plugin.get_callbacks() if isinstance(callbacks, dict) and 'shot_complete' in callbacks: try: callbacks['shot_complete'](path) except Exception: logger.exception("Plugin callback raised an exception") ########################################################################################################################################## # Repeat Experiment? # ########################################################################################################################################## if (self.manager_repeat == 1) or (self.manager_repeat == 2 and self.get_num_files() == 0): # Resubmit job to the bottom of the queue: try: message = self.process_request(path) logger.info(message) except: # TODO: make this error popup for the user logger.error('Failed to copy h5_file (%s) for repeat run'%path) self.set_status("Idle") logger.info('Stopping')
def manage(self): logger = logging.getLogger('BLACS.queue_manager.thread') # While the program is running! logger.info('starting') # HDF5 prints lots of errors by default, for things that aren't # actually errors. These are silenced on a per thread basis, # and automatically silenced in the main thread when h5py is # imported. So we'll silence them in this thread too: h5py._errors.silence_errors() # This name stores the queue currently being used to # communicate with tabs, so that abort signals can be put # to it when those tabs never respond and are restarted by # the user. self.current_queue = queue.Queue() #TODO: put in general configuration timeout_limit = 300 #seconds self.set_status("Idle") while self.manager_running: # If the pause button is pushed in, sleep if self.manager_paused: if self.get_status() == "Idle": logger.info('Paused') self.set_status("Queue paused") time.sleep(1) continue # Get the top file try: path = self.get_next_file() self.set_status('Preparing shot...', path) logger.info('Got a file: %s'%path) except: # If no files, sleep for 1s, self.set_status("Idle") time.sleep(1) continue devices_in_use = {} transition_list = {} start_time = time.time() self.current_queue = queue.Queue() # Function to be run when abort button is clicked def abort_function(): try: # Set device name to "Queue Manager" which will never be a labscript device name # as it is not a valid python variable name (has a space in it!) self.current_queue.put(['Queue Manager', 'abort']) except Exception: logger.exception('Could not send abort message to the queue manager') def restart_function(device_name): try: self.current_queue.put([device_name, 'restart']) except Exception: logger.exception('Could not send restart message to the queue manager for device %s'%device_name) ########################################################################################################################################## # transition to buffered # ########################################################################################################################################## try: # A Queue for event-based notification when the tabs have # completed transitioning to buffered: timed_out = False error_condition = False abort = False restarted = False self.set_status("Transitioning to buffered...", path) # Enable abort button, and link in current_queue: inmain(self._ui.queue_abort_button.clicked.connect,abort_function) inmain(self._ui.queue_abort_button.setEnabled,True) with h5py.File(path,'r') as hdf5_file: h5_file_devices = list(hdf5_file['devices/'].keys()) for name in h5_file_devices: try: # Connect restart signal from tabs to current_queue and transition the device to buffered mode success = self.transition_device_to_buffered(name,transition_list,path,restart_function) if not success: logger.error('%s has an error condition, aborting run' % name) error_condition = True break except Exception as e: logger.exception('Exception while transitioning %s to buffered mode.'%(name)) error_condition = True break devices_in_use = transition_list.copy() while transition_list and not error_condition: try: # Wait for a device to transtition_to_buffered: logger.debug('Waiting for the following devices to finish transitioning to buffered mode: %s'%str(transition_list)) device_name, result = self.current_queue.get(timeout=2) #Handle abort button signal if device_name == 'Queue Manager' and result == 'abort': # we should abort the run logger.info('abort signal received from GUI') abort = True break if result == 'fail': logger.info('abort signal received during transition to buffered of %s' % device_name) error_condition = True break elif result == 'restart': logger.info('Device %s was restarted, aborting shot.'%device_name) restarted = True break logger.debug('%s finished transitioning to buffered mode' % device_name) # The tab says it's done, but does it have an error condition? if self.get_device_error_state(device_name,transition_list): logger.error('%s has an error condition, aborting run' % device_name) error_condition = True break del transition_list[device_name] except queue.Empty: # It's been 2 seconds without a device finishing # transitioning to buffered. Is there an error? for name in transition_list: if self.get_device_error_state(name,transition_list): error_condition = True break if error_condition: break # Has programming timed out? if time.time() - start_time > timeout_limit: logger.error('Transitioning to buffered mode timed out') timed_out = True break # Handle if we broke out of loop due to timeout or error: if timed_out or error_condition or abort or restarted: # Pause the queue, re add the path to the top of the queue, and set a status message! # only if we aren't responding to an abort click if not abort: self.manager_paused = True self.prepend(path) if timed_out: self.set_status("Programming timed out\nQueue paused") elif abort: self.set_status("Aborted") elif restarted: self.set_status("Device restarted in transition to\nbuffered. Aborted. Queue paused.") else: self.set_status("Device(s) in error state\nQueue Paused") # Abort the run for all devices in use: # need to recreate the queue here because we don't want to hear from devices that are still transitioning to buffered mode self.current_queue = queue.Queue() for tab in devices_in_use.values(): # We call abort buffered here, because if each tab is either in mode=BUFFERED or transition_to_buffered failed in which case # it should have called abort_transition_to_buffered itself and returned to manual mode # Since abort buffered will only run in mode=BUFFERED, and the state is not queued indefinitely (aka it is deleted if we are not in mode=BUFFERED) # this is the correct method call to make for either case tab.abort_buffered(self.current_queue) # We don't need to check the results of this function call because it will either be successful, or raise a visible error in the tab. # disconnect restart signal from tabs inmain(tab.disconnect_restart_receiver,restart_function) # disconnect abort button and disable inmain(self._ui.queue_abort_button.clicked.disconnect,abort_function) inmain(self._ui.queue_abort_button.setEnabled,False) # Start a new iteration continue ########################################################################################################################################## # SCIENCE! # ########################################################################################################################################## # Get front panel data, but don't save it to the h5 file until the experiment ends: states,tab_positions,window_data,plugin_data = self.BLACS.front_panel_settings.get_save_data() self.set_status("Running (program time: %.3fs)..."%(time.time() - start_time), path) # A Queue for event-based notification of when the experiment has finished. experiment_finished_queue = queue.Queue() logger.debug('About to start the master pseudoclock') run_time = time.localtime() #TODO: fix potential race condition if BLACS is closing when this line executes? self.BLACS.tablist[self.master_pseudoclock].start_run(experiment_finished_queue) # Wait for notification of the end of run: abort = False restarted = False done = False while not (abort or restarted or done): try: done = experiment_finished_queue.get(timeout=0.5) == 'done' except queue.Empty: pass try: # Poll self.current_queue for abort signal from button or device restart device_name, result = self.current_queue.get_nowait() if (device_name == 'Queue Manager' and result == 'abort'): abort = True if result == 'restart': restarted = True # Check for error states in tabs for device_name, tab in devices_in_use.items(): if self.get_device_error_state(device_name,devices_in_use): restarted = True except queue.Empty: pass if abort or restarted: for devicename, tab in devices_in_use.items(): if tab.mode == MODE_BUFFERED: tab.abort_buffered(self.current_queue) # disconnect restart signal from tabs inmain(tab.disconnect_restart_receiver,restart_function) # Disable abort button inmain(self._ui.queue_abort_button.clicked.disconnect,abort_function) inmain(self._ui.queue_abort_button.setEnabled,False) if restarted: self.manager_paused = True self.prepend(path) self.set_status("Device restarted during run.\nAborted. Queue paused") elif abort: self.set_status("Aborted") if abort or restarted: # after disabling the abort button, we now start a new iteration continue logger.info('Run complete') self.set_status("Saving data...", path) # End try/except block here except Exception: logger.exception("Error in queue manager execution. Queue paused.") # Raise the error in a thread for visibility zprocess.raise_exception_in_thread(sys.exc_info()) # clean up the h5 file self.manager_paused = True # is this a repeat? try: with h5py.File(path, 'r') as h5_file: repeat_number = h5_file.attrs.get('run repeat', 0) except: repeat_numer = 0 # clean the h5 file: self.clean_h5_file(path, 'temp.h5', repeat_number=repeat_number) try: shutil.move('temp.h5', path) except Exception: msg = ('Couldn\'t delete failed run file %s, ' % path + 'another process may be using it. Using alternate ' 'filename for second attempt.') logger.warning(msg, exc_info=True) shutil.move('temp.h5', path.replace('.h5','_retry.h5')) path = path.replace('.h5','_retry.h5') # Put it back at the start of the queue: self.prepend(path) # Need to put devices back in manual mode self.current_queue = queue.Queue() for devicename, tab in devices_in_use.items(): if tab.mode == MODE_BUFFERED or tab.mode == MODE_TRANSITION_TO_BUFFERED: tab.abort_buffered(self.current_queue) # disconnect restart signal from tabs inmain(tab.disconnect_restart_receiver,restart_function) self.set_status("Error in queue manager\nQueue paused") # disconnect and disable abort button inmain(self._ui.queue_abort_button.clicked.disconnect,abort_function) inmain(self._ui.queue_abort_button.setEnabled,False) # Start a new iteration continue ########################################################################################################################################## # SCIENCE OVER! # ########################################################################################################################################## ########################################################################################################################################## # Transition to manual # ########################################################################################################################################## # start new try/except block here try: with h5py.File(path,'r+') as hdf5_file: self.BLACS.front_panel_settings.store_front_panel_in_h5(hdf5_file,states,tab_positions,window_data,plugin_data,save_conn_table=False, save_queue_data=False) data_group = hdf5_file['/'].create_group('data') # stamp with the run time of the experiment hdf5_file.attrs['run time'] = time.strftime('%Y%m%dT%H%M%S',run_time) # A Queue for event-based notification of when the devices have transitioned to static mode: # Shouldn't need to recreate the queue: self.current_queue = queue.Queue() # TODO: unserialise this if everything is using zprocess.locking # only transition one device to static at a time, # since writing data to the h5 file can potentially # happen at this stage: error_condition = False # This is far more complicated than it needs to be once transition_to_manual is unserialised! response_list = {} for device_name, tab in devices_in_use.items(): if device_name not in response_list: tab.transition_to_manual(self.current_queue) while True: # TODO: make the call to current_queue.get() timeout # and periodically check for error condition on the tab got_device_name, result = self.current_queue.get() # if the response is not for this device, then save it for later! if device_name != got_device_name: response_list[got_device_name] = result else: break else: result = response_list[device_name] # Check for abort signal from device restart if result == 'fail': error_condition = True if result == 'restart': error_condition = True if self.get_device_error_state(device_name,devices_in_use): error_condition = True # Once device has transitioned_to_manual, disconnect restart signal inmain(tab.disconnect_restart_receiver,restart_function) if error_condition: self.set_status("Error in transtion to manual\nQueue Paused") except Exception as e: error_condition = True logger.exception("Error in queue manager execution. Queue paused.") self.set_status("Error in queue manager\nQueue paused") # Raise the error in a thread for visibility zprocess.raise_exception_in_thread(sys.exc_info()) if error_condition: # clean up the h5 file self.manager_paused = True # is this a repeat? try: with h5py.File(path, 'r') as h5_file: repeat_number = h5_file.attrs.get('run repeat', 0) except: repeat_number = 0 # clean the h5 file: self.clean_h5_file(path, 'temp.h5', repeat_number=repeat_number) try: shutil.move('temp.h5', path) except Exception: msg = ('Couldn\'t delete failed run file %s, ' % path + 'another process may be using it. Using alternate ' 'filename for second attempt.') logger.warning(msg, exc_info=True) shutil.move('temp.h5', path.replace('.h5','_retry.h5')) path = path.replace('.h5','_retry.h5') # Put it back at the start of the queue: self.prepend(path) # Need to put devices back in manual mode. Since the experiment is over before this try/except block begins, we can # safely call transition_to_manual() on each device tab # TODO: Not serialised...could be bad with older BIAS versions :( self.current_queue = queue.Queue() for devicename, tab in devices_in_use.items(): if tab.mode == MODE_BUFFERED: tab.transition_to_manual(self.current_queue) # disconnect restart signal from tabs inmain(tab.disconnect_restart_receiver,restart_function) continue ########################################################################################################################################## # Analysis Submission # ########################################################################################################################################## logger.info('All devices are back in static mode.') # check for analysis Filters in Plugins send_to_analysis = True for callback in self.get_callbacks('analysis_cancel_send'): try: if callback(path): send_to_analysis = False break except Exception: logger.exception("Plugin callback raised an exception") # Submit to the analysis server if send_to_analysis: self.BLACS.analysis_submission.get_queue().put(['file', path]) ########################################################################################################################################## # Plugin callbacks # ########################################################################################################################################## for plugin in self.BLACS.plugins.values(): callbacks = plugin.get_callbacks() if isinstance(callbacks, dict) and 'shot_complete' in callbacks: try: callbacks['shot_complete'](path) except Exception: logger.exception("Plugin callback raised an exception") ########################################################################################################################################## # Repeat Experiment? # ########################################################################################################################################## # check for repeat Filters in Plugins repeat_shot = self.manager_repeat for callback in self.get_callbacks('shot_ignore_repeat'): try: if callback(path): repeat_shot = False break except Exception: logger.exception("Plugin callback raised an exception") if repeat_shot: if ((self.manager_repeat_mode == self.REPEAT_ALL) or (self.manager_repeat_mode == self.REPEAT_LAST and inmain(self._model.rowCount) == 0)): # Resubmit job to the bottom of the queue: try: message = self.process_request(path) except Exception: # TODO: make this error popup for the user self.logger.exception('Failed to copy h5_file (%s) for repeat run'%s) logger.info(message) self.set_status("Idle") logger.info('Stopping')