class RunControlServer: def __init__(self): # Define names of lock and last_used_setup files self.lock_file = "run/lock" self.lus_file = "setup/last_used_setup" # Redefine print to send output to log file sys.stdout = Logger() # Create lock file if (self.create_lock_file() == "error"): exit(1) # Define what setup to use at startup (can be changed interactively) initial_setup = self.get_initial_setup() print "=== Starting PADME Run Control server with %s setup"%initial_setup #self.write_log("=== Starting PADME Run Control server with %s setup"%initial_setup) # Create run self.run = Run() if (self.run.change_setup(initial_setup) == "error"): print "ERROR - Error while changing run setup to %s"%initial_setup #self.write_log("ERROR - Error while changing run setup to %s"%initial_setup) exit(1) # Start in idle state self.current_state = "idle" # Create handler for PadmeDB self.db = PadmeDB() # Create useful regular expressions self.re_get_board_config = re.compile("^get_board_config (\d+)$") self.re_get_board_log_file = re.compile("^get_board_log_file (\d+)$") self.re_change_setup = re.compile("^change_setup (\w+)$") # Create a TCP/IP socket self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Bind the socket to the port server_address = ('localhost', 10000) print "Starting server socket on %s port %s"%server_address #self.write_log("Starting server socket on %s port %s"%server_address) try: self.sock.bind(server_address) # Listen for incoming connections except: print "ERROR - Could not bind to socket: %s"%str(sys.exc_info()[0]) #self.write_log("ERROR - Could not bind to socket: %s"%str(sys.exc_info()[0])) exit(1) self.sock.listen(1) # Define SIGINT handler signal.signal(signal.SIGINT,self.sigint_handler) # Setup main interface self.main_loop() # Clean up before exiting self.sock.close() def create_lock_file(self): # Check if lock file exists if (os.path.exists(self.lock_file)): if (os.path.isfile(self.lock_file)): pid = 0 lf = open(self.lock_file,"r") for ll in lf: pid = ll lf.close() print "ERROR - Lock file %s found for pid %s"%(self.lock_file,pid) #self.write_log("ERROR - Lock file %s found for pid %s"%(self.lock_file,pid)) else: print "ERROR - Lock file %s found but it is not a file"%self.lock_file #self.write_log("ERROR - Lock file %s found but it is not a file"%self.lock_file) return "error" # Create our own lock file pid = os.getpid() lf = open(self.lock_file,"w") lf.write("%d"%pid) lf.close() return "ok" def get_initial_setup(self): setup = "test" lus = "" if (os.path.exists(self.lus_file)): if (os.path.isfile(self.lus_file)): lusf = open(self.lus_file,"r") lus = lusf.read().strip("\n") lusf.close() if (lus == ""): print "WARNING - File with last used setup %s exists but it is empty - Using default setup %s"%(self.lus_file,setup) #self.write_log("WARNING - File with last used setup %s exists but it is empty - Using default setup %s"%(self.lus_file,setup)) else: setup = lus else: print "WARNING - File with last used setup %s exists but it is not a file - Using default setup %s"%(self.lus_file,setup) #self.write_log("WARNING - File with last used setup %s exists but it is not a file - Using default setup %s"%(self.lus_file,setup)) else: print "WARNING - Could not find file with last used setup %s - Using default setup %s"%(self.lus_file,setup) #self.write_log("WARNING - Could not find file with last used setup %s - Using default setup %s"%(self.lus_file,setup)) return setup def sigint_handler(self,signal,frame): print "RunControlSever received SIGINT: exiting" # If a run is initialized/running, abort it as cleanly as possible if ( self.current_state == "initialized" or self.current_state == "running" ): self.run.run_comment_end = "Run aborted because of SIGINT" print "Aborting run on SIGINT" #self.write_log("Aborting run on SIGINT") if (self.run.run_number): self.db.set_run_status(self.run.run_number,4) # Status 4: run aborted self.db.set_run_time_stop(self.run.run_number,self.now_str()) self.db.set_run_comment_end(self.run.run_number,self.run.run_end_comment) open(self.run.quit_file,'w').close() for adc in (self.run.adcboard_list): if adc.stop_daq(): print "ADC board %02d - Terminated correctly"%adc.board_id #self.write_log("ADC board %02d - Terminated correctly"%adc.board_id) else: print "ADC board %02d - WARNING: problems while terminating DAQ"%adc.board_id #self.write_log("ADC board %02d - WARNING: problems while terminating DAQ"%adc.board_id) if (self.run.run_number): self.db.set_run_status(self.run.run_number,6) # Status 6: run ended with errors # Clean up run directory for adc in (self.run.adcboard_list): if (os.path.exists(adc.initok_file)): os.remove(adc.initok_file) if (os.path.exists(adc.initfail_file)): os.remove(adc.initfail_file) if(os.path.exists(self.run.start_file)): os.remove(self.run.start_file) if(os.path.exists(self.run.quit_file)): os.remove(self.run.quit_file) if os.path.exists(self.lock_file): os.remove(self.lock_file) # Now we can exit exit(0) def main_loop(self): while True: # Wait for a connection print "Waiting for a connection" #self.write_log('waiting for a connection') (self.connection,client_address) = self.sock.accept() print "Connection from %s"%str(client_address) #self.write_log('connection from '+str(client_address)) while True: # Handle connection according to curren status of RunControl if self.current_state == "idle": new_state = self.state_idle() elif self.current_state == "initialized": new_state = self.state_initialized() elif self.current_state == "running": new_state = self.state_running() elif self.current_state == "initfail": new_state = self.state_initfail() else: print "ERROR: unknown state %s - ABORTING"%self.current_state #self.write_log("ERROR: unknown state %s - ABORTING"%self.current_state) new_state = "exit" # See if status changed if new_state == "idle" or new_state == "initialized" or new_state == "running" or new_state == "initfail": self.current_state = new_state elif new_state == "client_close": self.connection.close() break elif new_state == "exit": print "=== RunControlSever received exit command: exiting" self.connection.close() if os.path.exists(self.lock_file): os.remove(self.lock_file) exit(0) else: print "=== RunControlServer = ERROR: unknown new state %s - ABORTING"%new_state #self.write_log("ERROR: unknown new state %s - ABORTING"%new_state) self.connection.close() if os.path.exists(self.lock_file): os.remove(self.lock_file) exit(1) def write_log(self,msg): print self.now_str()+" "+msg def now_str(self): return time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()) def state_idle(self): # Receive and process commands for "idle" state while True: cmd = self.get_command() print "Received command %s"%cmd #self.write_log('Received command '+cmd) if (cmd == "client_close"): return "client_close" elif (cmd == "get_state"): self.send_answer(self.current_state) elif (cmd == "get_setup"): self.send_answer(self.run.setup) elif (cmd == "get_setup_list"): self.send_answer(self.get_setup_list()) elif (cmd == "get_board_list"): self.send_answer(str(self.run.boardid_list)) elif (cmd == "get_run_number"): self.send_answer(str(self.db.get_last_run_in_db())) elif (cmd == "new_run"): res = self.new_run() if (res == "client_close"): return "client_close" elif (res == "error"): #self.write_log('ERROR while initializing new run') print "ERROR while initializing new run" elif (res == "initialized"): return "initialized" elif (res == "initfail"): return "initfail" else: #self.write_log("ERROR: new_run returned unknown answer "+res+" (?)") print "ERROR: new_run returned unknown answer %s (?)"%res elif (cmd == "exit"): self.send_answer("exiting") return "exit" elif (cmd == "help"): msg = """Available commands: help\t\tShow this help get_state\tShow current state of RunControl get_setup\tShow current setup name get_setup_list\tShow list of available setups get_board_list\tShow list of boards in use with current setup get_board_config <b>\tShow current configuration of board <b> get_run_number\tReturn last run number in DB change_setup <setup>\tChange run setup to <setup> new_run\t\tInitialize system for a new run exit\t\tTell RunControl server to exit (use with extreme care!)""" self.send_answer(msg) else: # See if command can be handled by a regular expression found_re = False m = self.re_get_board_config.match(cmd) if (m): self.send_answer(self.get_board_config(int(m.group(1)))) found_re = True m = self.re_change_setup.match(cmd) if (m): self.send_answer(self.change_setup(m.group(1))) found_re = True # No regular expression matched: command is unknown if not found_re: self.send_answer("unknown command") #self.write_log('command '+cmd+' is unknown') print "Command %s is unknown"%cmd def state_initialized(self): # Receive and process commands for "initialized" state while True: cmd = self.get_command() #self.write_log('received command '+cmd) print "Received command %s"%cmd if (cmd == "client_close"): return "client_close" elif (cmd == "get_state"): self.send_answer(self.current_state) elif (cmd == "get_setup"): self.send_answer(self.run.setup) elif (cmd == "get_board_list"): self.send_answer(str(self.run.boardid_list)) elif (cmd == "get_run_number"): self.send_answer(str(self.run.run_number)) elif (cmd == "abort_run"): return self.abort_run() elif (cmd == "start_run"): return self.start_run() elif (cmd == "exit"): self.send_answer("exiting") return "exit" elif (cmd == "help"): msg = """Available commands: help\t\tShow this help get_state\tShow current state of RunControl get_setup\tShow current setup name get_board_list\tShow list of boards in use with current setup get_board_config <b>\tShow current configuration of board <b> get_board_log_file <b>\tGet name of log file for board <b> get_run_number\tReturn current run number start_run\t\tStart run abort_run\t\tAbort run exit\t\tTell RunControl server to exit (use with extreme care!)""" self.send_answer(msg) else: # See if command can be handled by a regular expression found_re = False m = self.re_get_board_config.match(cmd) if (m): self.send_answer(self.get_board_config(int(m.group(1)))) found_re = True m = self.re_get_board_log_file.match(cmd) if (m): self.send_answer(self.get_board_log_file(int(m.group(1)))) found_re = True # No regular expression matched: command is unknown if not found_re: self.send_answer("unknown command") #self.write_log('command '+cmd+' is unknown') print "Command %s is unknown"%cmd def state_running(self): # Receive and process commands for "running" state while True: cmd = self.get_command() #self.write_log('received command '+cmd) print "Received command %s"%cmd if (cmd == "client_close"): return "client_close" elif (cmd == "get_state"): self.send_answer(self.current_state) elif (cmd == "get_setup"): self.send_answer(self.run.setup) elif (cmd == "get_board_list"): self.send_answer(str(self.run.boardid_list)) elif (cmd == "get_run_number"): self.send_answer(str(self.run.run_number)) elif (cmd == "stop_run"): return self.stop_run() elif (cmd == "exit"): self.send_answer("exiting") return "exit" elif (cmd == "help"): msg = """Available commands: help\t\tShow this help get_state\tShow current state of RunControl get_setup\tShow current setup name get_board_list\tShow list of boards in use with current setup get_board_config <b>\tShow current configuration of board <b> get_board_log_file <b>\tGet name of log file for board <b> get_run_number\tReturn current run number stop_run\t\tStop the run exit\t\tTell RunControl server to exit (use with extreme care!)""" self.send_answer(msg) else: # See if command can be handled by a regular expression found_re = False m = self.re_get_board_config.match(cmd) if (m): self.send_answer(self.get_board_config(int(m.group(1)))) found_re = True m = self.re_get_board_log_file.match(cmd) if (m): self.send_answer(self.get_board_log_file(int(m.group(1)))) found_re = True # No regular expression matched: command is unknown if not found_re: self.send_answer("unknown command") #self.write_log('command '+cmd+' is unknown') print "Command %s is unknown"%cmd return "idle" def state_initfail(self): return "idle" def get_command(self): # First get length of string l = "" for i in range(5): # Max 99999 characters ch = self.connection.recv(1) if ch: l += ch else: #self.write_log('no more data from client') print "Client closed connection" return "client_close" ll = int(l) # Then read the right amount of characters from the socket cmd = "" for i in range(ll): ch = self.connection.recv(1) if ch: cmd += ch else: #self.write_log('no more data from client') print "Client closed connection" return "client_close" return cmd def send_answer(self,answer): if len(answer)<100000: #self.write_log("Sending answer "+answer) print "Sending answer %s"%answer self.connection.sendall("%5.5d"%len(answer)+answer) else: #self.write_log('answer too long: cannot send') print "Answer is too long: cannot send" def get_board_config(self,brdid): if brdid in self.run.boardid_list: return self.run.adcboard_list[self.run.boardid_list.index(brdid)].format_config() else: return "ERROR: board id %d does not exist"%brdid def get_board_log_file(self,brdid): if brdid in self.run.boardid_list: return self.run.adcboard_list[self.run.boardid_list.index(brdid)].log_file else: return "ERROR: board id %d does not exist"%brdid def read_setup_list(self): # Get list of available setups setup_main_dir = "setup" setups = [] for top,dirs,files in os.walk(setup_main_dir): if (top == setup_main_dir): for setup_dir in dirs: setups.append(setup_dir) setups.sort() return setups def get_setup_list(self): return str(self.read_setup_list()) def change_setup(self,setup): # Check if requested setup is known if not (setup in self.read_setup_list()): #self.write_log("change_setup - ERROR: request to set unknown setup "+setup) print "change_setup - ERROR: request to set unknown setup %s"%setup return "error" # Change (or reload) setup if (setup==self.run.setup): #self.write_log("change_setup - reloading setup "+setup) print "change_setup - reloading setup %s"%setup else: #self.write_log("change_setup - changing setup from "+self.run.setup+" to "+setup) print "change_setup - changing setup from %s to %s"%(self.run.setup,setup) self.run.change_setup(setup) return setup def new_run(self): # Retrieve run number - next=next run from DB, dummy=dummy run (i.e. run nr=0) # Return run number used (0 for dummy run) or "error" for invalid answer newrun_number = 0 self.send_answer("run_number") ans = self.get_command() if (ans=="next"): newrun_number = self.db.get_last_run_in_db()+1 elif (ans=="dummy"): newrun_number = 0 elif (ans == "error"): #self.write_log("run_number - client returned error") print "run_number - client returned error" return "error" elif (ans=="client_close"): return "client_close" else: #self.write_log("run_number - invalid option %s received"%ans) print "run_number - invalid option %s received"%ans self.send_answer("error") return "error" self.send_answer(str(newrun_number)) # Retrieve run type (TEST,DAQ,COSMIC) # Return run type used or "error" for invalid answer newrun_type = "" self.send_answer("run_type") ans = self.get_command() if (ans=="TEST" or ans=="DAQ" or ans=="COSMIC"): newrun_type = ans elif (ans == "error"): self.write_log("run_type - client returned error") return "error" elif (ans=="client_close"): return "client_close" else: # self.write_log("run_type - invalid option %s received"%ans) print "run_type - invalid option %s received"%ans self.send_answer("error") return "error" self.send_answer(newrun_type) newrun_user = "" self.send_answer("shift_crew") ans = self.get_command() if (ans=="client_close"): return "client_close" newrun_user = ans newrun_comment = "" self.send_answer("run_comment") ans = self.get_command() if (ans=="client_close"): return "client_close" newrun_comment = ans #self.write_log("Run number: "+str(newrun_number)) #self.write_log("Run type: "+newrun_type) #self.write_log("Run crew: "+newrun_user) #self.write_log("Run comment: "+newrun_comment) print "Run number: %d"%newrun_number print "Run type: %s"%newrun_type print "Run crew: %s"%newrun_user print "Run comment: %s"%newrun_comment # Set run configuration according to user's request self.run.change_run(newrun_number) self.run.run_type = newrun_type self.run.run_user = newrun_user self.run.run_comment = newrun_comment # Check if requested run number was not used before # Saves the day if more than one RunControl program is running at the same time (DON'T DO THAT!!!) if (self.run.run_number): run_is_in_db = self.db.is_run_in_db(self.run.run_number) if (run_is_in_db): #self.write_log("ERROR - Run "+str(self.run.run_number)+" is already in the DB: cannot use it again") #self.write_log("Please check if someone else is using this RunControl before retrying") print "ERROR - Run %d is already in the DB: cannot use it again"%self.run.run_number print "Please check if someone else is using this RunControl before retrying" self.send_answer("error_init") return "error" # Create run structure in the DB #self.write_log("Initializing Run "+str(self.run.run_number)) print "Initializing Run %d"%self.run.run_number self.run.create_run() if (self.run.run_number): self.db.set_run_time_init(self.run.run_number,self.now_str()) # Create directory to host log files #self.write_log("Creating log directory "+self.run.log_dir) print "Creating log directory %s"%self.run.log_dir self.run.create_log_dir() # Write run and boards configuration files #self.write_log("Writing configuration file "+self.run.config_file) print "Writing configuration file %s"%self.run.config_file self.run.write_config() for adc in (self.run.adcboard_list): #self.write_log("Writing configuration file "+adc.config_file) print "Writing configuration file %s"%adc.config_file adc.write_config() # Start DAQ for all boards self.send_answer("start_init") for adc in (self.run.adcboard_list): p_id = adc.start_daq() if p_id: #self.write_log("ADC board %02d - Started DAQ with process id %d"%(adc.board_id,p_id)) print "ADC board %02d - Started DAQ with process id %d"%(adc.board_id,p_id) self.send_answer("adc "+str(adc.board_id)+" init") adc.status = "init" else: #self.write_log("ADC board %02d - ERROR: could not start DAQ"%adc.board_id) print "ADC board %02d - ERROR: could not start DAQ"%adc.board_id self.send_answer("adc "+str(adc.board_id)+" fail") adc.status = "fail" # Wait for all boards to finish initialization n_try = 0 while(1): all_boards_init = 1 all_boards_ready = 1 for adc in (self.run.adcboard_list): # Check if any board changed status if (adc.status == "init"): if (os.path.exists(adc.initok_file)): # Initialization ended OK #self.write_log("ADC board %02d - Initialized and ready for DAQ"%adc.board_id) print "ADC board %02d - Initialized and ready for DAQ"%adc.board_id self.send_answer("adc "+str(adc.board_id)+" ready") adc.status = "ready" elif (os.path.exists(adc.initfail_file)): # Problem during initialization #self.write_log("ADC board %02d - *** Initialization failed ***"%adc.board_id) print "ADC board %02d - *** Initialization failed ***"%adc.board_id self.send_answer("adc "+str(adc.board_id)+" fail") adc.status = "fail" else: # This board is still initializing all_boards_init = 0 # Check if any board is in fail status if (adc.status == "fail"): all_boards_ready = 0 if (all_boards_init == 0): # Some boards are still initializing: keep waiting n_try += 1 if (n_try>=10): #self.write_log("*** ERROR *** One or more boards did not initialize within 10sec. Cannot start run") print "*** ERROR *** One or more boards did not initialize within 10sec. Cannot start run" if (self.run.run_number): self.db.set_run_status(self.run.run_number,5) # Status 5: run with problems at initialization self.send_answer("init_timeout") return "error" time.sleep(1) elif (all_boards_ready): #self.write_log("All boards completed initialization: DAQ run can be started") print "All boards completed initialization: DAQ run can be started" if (self.run.run_number): self.db.set_run_status(self.run.run_number,1) # Status 1: run correctly initialized self.send_answer("init_ready") return "initialized" else: #self.write_log("*** ERROR *** One or more boards failed the initialization. Cannot start run") print "*** ERROR *** One or more boards failed the initialization. Cannot start run" if (self.run.run_number): self.db.set_run_status(self.run.run_number,5) # Status 5: run with problems at initialization self.send_answer("init_fail") return "initfail" def start_run(self): #self.write_log("Starting run") print "Starting run" if (self.run.run_number): self.db.set_run_time_start(self.run.run_number,self.now_str()) self.db.set_run_status(self.run.run_number,2) # Status 2: run started # Create "start the run" tag file open(self.run.start_file,'w').close() self.send_answer("run_started") # RunControl is now in "running" mode return "running" def stop_run(self): self.send_answer("run_comment_end") ans = self.get_command() if (ans=="client_close"): return "client_close" #self.write_log("End of Run comment: "+ans) print "End of Run comment: %s"%ans self.run.run_comment_end = ans #self.write_log("Stopping run") print "Stopping run" if (self.run.run_number): self.db.set_run_status(self.run.run_number,3) # Status 3: run stopped normally return self.terminate_run() def abort_run(self): self.run.run_comment_end = "Run aborted" #self.write_log("Aborting run") print "Aborting run" if (self.run.run_number): self.db.set_run_status(self.run.run_number,4) # Status 4: run aborted return self.terminate_run() def terminate_run(self): if (self.run.run_number): self.db.set_run_time_stop(self.run.run_number,self.now_str()) self.db.set_run_comment_end(self.run.run_number,self.run.run_end_comment) # Create "stop the run" tag file open(self.run.quit_file,'w').close() # Run stop_daq procedure for each ADC board terminate_ok = True for adc in (self.run.adcboard_list): if adc.stop_daq(): self.send_answer("adc %d terminate_ok"%adc.board_id) #self.write_log("ADC board %02d - Terminated correctly"%adc.board_id) print "ADC board %02d - Terminated correctly"%adc.board_id else: terminate_ok = False self.send_answer("adc %d terminate_error"%adc.board_id) #self.write_log("ADC board %02d - WARNING: problems while terminating DAQ"%adc.board_id) print "ADC board %02d - WARNING: problems while terminating DAQ"%adc.board_id if (self.run.run_number): self.db.set_run_status(self.run.run_number,6) # Status 6: run ended with errors # Clean up run directory for adc in (self.run.adcboard_list): if (os.path.exists(adc.initok_file)): os.remove(adc.initok_file) if (os.path.exists(adc.initfail_file)): os.remove(adc.initfail_file) if(os.path.exists(self.run.start_file)): os.remove(self.run.start_file) if(os.path.exists(self.run.quit_file)): os.remove(self.run.quit_file) if terminate_ok: self.send_answer("terminate_ok") else: self.send_answer("terminate_error") # At the end of this procedure RunControl is back to "idle" mode return "idle" def now_str(self): return time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
class RunControlServer: def __init__(self): # Define names of lock and last_used_setup files self.lock_file = "run/lock" self.lus_file = "setup/last_used_setup" # Redefine print to send output to log file sys.stdout = Logger() # Create lock file if (self.create_lock_file() == "error"): exit(1) # Define what setup to use at startup (can be changed interactively) initial_setup = self.get_initial_setup() print "=== Starting PADME Run Control server with %s setup" % initial_setup #self.write_log("=== Starting PADME Run Control server with %s setup"%initial_setup) # Create run self.run = Run() if (self.run.change_setup(initial_setup) == "error"): print "ERROR - Error while changing run setup to %s" % initial_setup #self.write_log("ERROR - Error while changing run setup to %s"%initial_setup) if os.path.exists(self.lock_file): os.remove(self.lock_file) exit(1) # Start in idle state self.current_state = "idle" # Create handler for PadmeDB self.db = PadmeDB() # Create useful regular expressions self.re_get_board_config = re.compile("^get_board_config (\d+)$") self.re_get_board_log_file = re.compile("^get_board_log_file (\d+)$") self.re_change_setup = re.compile("^change_setup (\w+)$") # Create a TCP/IP socket self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Bind the socket to the port server_address = ('localhost', 10000) print "Starting server socket on %s port %s" % server_address #self.write_log("Starting server socket on %s port %s"%server_address) try: self.sock.bind(server_address) # Listen for incoming connections except: print "ERROR - Could not bind to socket: %s" % str( sys.exc_info()[0]) #self.write_log("ERROR - Could not bind to socket: %s"%str(sys.exc_info()[0])) if os.path.exists(self.lock_file): os.remove(self.lock_file) exit(1) self.sock.listen(1) # Define SIGINT handler signal.signal(signal.SIGINT, self.sigint_handler) # Setup main interface self.main_loop() # Clean up before exiting self.sock.close() def create_lock_file(self): # Check if lock file exists if (os.path.exists(self.lock_file)): if (os.path.isfile(self.lock_file)): pid = 0 with open(self.lock_file, "r") as lf: for ll in lf: pid = ll print "Lock file %s found for pid %s - checking status" % ( self.lock_file, pid) #self.write_log("ERROR - Lock file %s found for pid %s"%(self.lock_file,pid)) ppinfo = os.popen("ps -p %s" % pid) pinfo = ppinfo.readlines() ppinfo.close() if len(pinfo) == 2: if pinfo[1].find("<defunct>") > -1: print "There is zombie process with this pid. The RunControlServer is probably dead. Proceeding cautiously..." else: print "ERROR - there is already a RunControlServer running with pid %s" % pid return "error" else: print "No RunControlServer process found. As you were..." else: print "ERROR - Lock file %s found but it is not a file" % self.lock_file #self.write_log("ERROR - Lock file %s found but it is not a file"%self.lock_file) return "error" # Create our own lock file pid = os.getpid() with open(self.lock_file, "w") as lf: lf.write("%d" % pid) return "ok" def get_initial_setup(self): setup = "test" lus = "" if (os.path.exists(self.lus_file)): if (os.path.isfile(self.lus_file)): lusf = open(self.lus_file, "r") lus = lusf.read().strip("\n") lusf.close() if (lus == ""): print "WARNING - File with last used setup %s exists but it is empty - Using default setup %s" % ( self.lus_file, setup) #self.write_log("WARNING - File with last used setup %s exists but it is empty - Using default setup %s"%(self.lus_file,setup)) else: setup = lus else: print "WARNING - File with last used setup %s exists but it is not a file - Using default setup %s" % ( self.lus_file, setup) #self.write_log("WARNING - File with last used setup %s exists but it is not a file - Using default setup %s"%(self.lus_file,setup)) else: print "WARNING - Could not find file with last used setup %s - Using default setup %s" % ( self.lus_file, setup) #self.write_log("WARNING - Could not find file with last used setup %s - Using default setup %s"%(self.lus_file,setup)) return setup def sigint_handler(self, signal, frame): print "RunControlSever received SIGINT: exiting" # If a run is initialized/running, abort it as cleanly as possible if (self.current_state == "initialized" or self.current_state == "running"): self.run.run_comment_end = "Run aborted because of SIGINT" print "Aborting run on SIGINT" #self.write_log("Aborting run on SIGINT") if (self.run.run_number): self.db.set_run_status(self.run.run_number, 4) # Status 4: run aborted self.db.set_run_time_stop(self.run.run_number, self.now_str()) self.db.set_run_comment_end(self.run.run_number, self.run.run_comment_end) open(self.run.quit_file, 'w').close() for adc in (self.run.adcboard_list): if adc.stop_daq(): print "ADC board %02d - Terminated correctly" % adc.board_id #self.write_log("ADC board %02d - Terminated correctly"%adc.board_id) else: print "ADC board %02d - WARNING: problems while terminating DAQ" % adc.board_id #self.write_log("ADC board %02d - WARNING: problems while terminating DAQ"%adc.board_id) if (self.run.run_number): self.db.set_run_status( self.run.run_number, 6) # Status 6: run ended with errors # Clean up run directory for adc in (self.run.adcboard_list): if (os.path.exists(adc.initok_file)): os.remove(adc.initok_file) if (os.path.exists(adc.initfail_file)): os.remove(adc.initfail_file) if (os.path.exists(self.run.start_file)): os.remove(self.run.start_file) if (os.path.exists(self.run.quit_file)): os.remove(self.run.quit_file) if os.path.exists(self.lock_file): os.remove(self.lock_file) # Now we can exit exit(0) def main_loop(self): while True: # Wait for a connection print "Waiting for a connection" #self.write_log('waiting for a connection') (self.connection, client_address) = self.sock.accept() print "Connection from %s" % str(client_address) #self.write_log('connection from '+str(client_address)) while True: # Handle connection according to curren status of RunControl if self.current_state == "idle": new_state = self.state_idle() elif self.current_state == "initialized": new_state = self.state_initialized() elif self.current_state == "running": new_state = self.state_running() elif self.current_state == "initfail": new_state = self.state_initfail() else: print "ERROR: unknown state %s - ABORTING" % self.current_state #self.write_log("ERROR: unknown state %s - ABORTING"%self.current_state) new_state = "exit" # See if status changed if new_state == "idle" or new_state == "initialized" or new_state == "running" or new_state == "initfail": self.current_state = new_state elif new_state == "client_close": self.connection.close() break elif new_state == "exit": print "=== RunControlSever received exit command: exiting" self.connection.close() if os.path.exists(self.lock_file): os.remove(self.lock_file) exit(0) else: print "=== RunControlServer = ERROR: unknown new state %s - ABORTING" % new_state #self.write_log("ERROR: unknown new state %s - ABORTING"%new_state) self.connection.close() if os.path.exists(self.lock_file): os.remove(self.lock_file) exit(1) def write_log(self, msg): print self.now_str() + " " + msg def now_str(self): return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) def state_idle(self): # Receive and process commands for "idle" state while True: cmd = self.get_command() print "Received command %s" % cmd #self.write_log('Received command '+cmd) if (cmd == "client_close"): return "client_close" elif (cmd == "get_state"): self.send_answer(self.current_state) elif (cmd == "get_setup"): self.send_answer(self.run.setup) elif (cmd == "get_setup_list"): self.send_answer(self.get_setup_list()) elif (cmd == "get_board_list"): self.send_answer(str(self.run.boardid_list)) elif (cmd == "get_run_number"): self.send_answer(str(self.db.get_last_run_in_db())) elif (cmd == "new_run"): res = self.new_run() if (res == "client_close"): return "client_close" elif (res == "error"): #self.write_log('ERROR while initializing new run') print "ERROR while initializing new run" elif (res == "initialized"): return "initialized" elif (res == "initfail"): return "initfail" else: #self.write_log("ERROR: new_run returned unknown answer "+res+" (?)") print "ERROR: new_run returned unknown answer %s (?)" % res elif (cmd == "exit"): self.send_answer("exiting") return "exit" elif (cmd == "help"): msg = """Available commands: help\t\tShow this help get_state\tShow current state of RunControl get_setup\tShow current setup name get_setup_list\tShow list of available setups get_board_list\tShow list of boards in use with current setup get_board_config <b>\tShow current configuration of board <b> get_run_number\tReturn last run number in DB change_setup <setup>\tChange run setup to <setup> new_run\t\tInitialize system for a new run exit\t\tTell RunControl server to exit (use with extreme care!)""" self.send_answer(msg) else: # See if command can be handled by a regular expression found_re = False m = self.re_get_board_config.match(cmd) if (m): self.send_answer(self.get_board_config(int(m.group(1)))) found_re = True m = self.re_change_setup.match(cmd) if (m): self.send_answer(self.change_setup(m.group(1))) found_re = True # No regular expression matched: command is unknown if not found_re: self.send_answer("unknown command") #self.write_log('command '+cmd+' is unknown') print "Command %s is unknown" % cmd def state_initialized(self): # Receive and process commands for "initialized" state while True: cmd = self.get_command() #self.write_log('received command '+cmd) print "Received command %s" % cmd if (cmd == "client_close"): return "client_close" elif (cmd == "get_state"): self.send_answer(self.current_state) elif (cmd == "get_setup"): self.send_answer(self.run.setup) elif (cmd == "get_board_list"): self.send_answer(str(self.run.boardid_list)) elif (cmd == "get_run_number"): self.send_answer(str(self.run.run_number)) elif (cmd == "abort_run"): return self.abort_run() elif (cmd == "start_run"): return self.start_run() elif (cmd == "exit"): self.send_answer("exiting") return "exit" elif (cmd == "help"): msg = """Available commands: help\t\tShow this help get_state\tShow current state of RunControl get_setup\tShow current setup name get_board_list\tShow list of boards in use with current setup get_board_config <b>\tShow current configuration of board <b> get_board_log_file <b>\tGet name of log file for board <b> get_run_number\tReturn current run number start_run\t\tStart run abort_run\t\tAbort run exit\t\tTell RunControl server to exit (use with extreme care!)""" self.send_answer(msg) else: # See if command can be handled by a regular expression found_re = False m = self.re_get_board_config.match(cmd) if (m): self.send_answer(self.get_board_config(int(m.group(1)))) found_re = True m = self.re_get_board_log_file.match(cmd) if (m): self.send_answer(self.get_board_log_file(int(m.group(1)))) found_re = True # No regular expression matched: command is unknown if not found_re: self.send_answer("unknown command") #self.write_log('command '+cmd+' is unknown') print "Command %s is unknown" % cmd def state_running(self): # Receive and process commands for "running" state while True: cmd = self.get_command() #self.write_log('received command '+cmd) print "Received command %s" % cmd if (cmd == "client_close"): return "client_close" elif (cmd == "get_state"): self.send_answer(self.current_state) elif (cmd == "get_setup"): self.send_answer(self.run.setup) elif (cmd == "get_board_list"): self.send_answer(str(self.run.boardid_list)) elif (cmd == "get_run_number"): self.send_answer(str(self.run.run_number)) elif (cmd == "stop_run"): return self.stop_run() elif (cmd == "exit"): self.send_answer("exiting") return "exit" elif (cmd == "help"): msg = """Available commands: help\t\tShow this help get_state\tShow current state of RunControl get_setup\tShow current setup name get_board_list\tShow list of boards in use with current setup get_board_config <b>\tShow current configuration of board <b> get_board_log_file <b>\tGet name of log file for board <b> get_run_number\tReturn current run number stop_run\t\tStop the run exit\t\tTell RunControl server to exit (use with extreme care!)""" self.send_answer(msg) else: # See if command can be handled by a regular expression found_re = False m = self.re_get_board_config.match(cmd) if (m): self.send_answer(self.get_board_config(int(m.group(1)))) found_re = True m = self.re_get_board_log_file.match(cmd) if (m): self.send_answer(self.get_board_log_file(int(m.group(1)))) found_re = True # No regular expression matched: command is unknown if not found_re: self.send_answer("unknown command") #self.write_log('command '+cmd+' is unknown') print "Command %s is unknown" % cmd return "idle" def state_initfail(self): return "idle" def get_command(self): # First get length of string l = "" for i in range(5): # Max 99999 characters ch = self.connection.recv(1) if ch: l += ch else: #self.write_log('no more data from client') print "Client closed connection" return "client_close" ll = int(l) # Then read the right amount of characters from the socket cmd = "" for i in range(ll): ch = self.connection.recv(1) if ch: cmd += ch else: #self.write_log('no more data from client') print "Client closed connection" return "client_close" return cmd def send_answer(self, answer): if len(answer) < 100000: #self.write_log("Sending answer "+answer) print "Sending answer %s" % answer self.connection.sendall("%5.5d" % len(answer) + answer) else: #self.write_log('answer too long: cannot send') print "Answer is too long: cannot send" def get_board_config(self, brdid): if brdid in self.run.boardid_list: return self.run.adcboard_list[self.run.boardid_list.index( brdid)].format_config() else: return "ERROR: board id %d does not exist" % brdid def get_board_log_file(self, brdid): if brdid in self.run.boardid_list: return self.run.adcboard_list[self.run.boardid_list.index( brdid)].log_file else: return "ERROR: board id %d does not exist" % brdid def read_setup_list(self): # Get list of available setups setup_main_dir = "setup" setups = [] for top, dirs, files in os.walk(setup_main_dir): if (top == setup_main_dir): for setup_dir in dirs: setups.append(setup_dir) setups.sort() return setups def get_setup_list(self): return str(self.read_setup_list()) def change_setup(self, setup): # Check if requested setup is known if not (setup in self.read_setup_list()): #self.write_log("change_setup - ERROR: request to set unknown setup "+setup) print "change_setup - ERROR: request to set unknown setup %s" % setup return "error" # Change (or reload) setup if (setup == self.run.setup): #self.write_log("change_setup - reloading setup "+setup) print "change_setup - reloading setup %s" % setup else: #self.write_log("change_setup - changing setup from "+self.run.setup+" to "+setup) print "change_setup - changing setup from %s to %s" % ( self.run.setup, setup) self.run.change_setup(setup) return setup def new_run(self): # Retrieve run number - next=next run from DB, dummy=dummy run (i.e. run nr=0) # Return run number used (0 for dummy run) or "error" for invalid answer newrun_number = 0 self.send_answer("run_number") ans = self.get_command() if (ans == "next"): newrun_number = self.db.get_last_run_in_db() + 1 elif (ans == "dummy"): newrun_number = 0 elif (ans == "error"): #self.write_log("run_number - client returned error") print "run_number - client returned error" return "error" elif (ans == "client_close"): return "client_close" else: #self.write_log("run_number - invalid option %s received"%ans) print "run_number - invalid option %s received" % ans self.send_answer("error") return "error" self.send_answer(str(newrun_number)) # Retrieve run type (TEST,DAQ,COSMIC) # Return run type used or "error" for invalid answer newrun_type = "" self.send_answer("run_type") ans = self.get_command() if (ans == "TEST" or ans == "DAQ" or ans == "COSMIC"): newrun_type = ans elif (ans == "error"): self.write_log("run_type - client returned error") return "error" elif (ans == "client_close"): return "client_close" else: # self.write_log("run_type - invalid option %s received"%ans) print "run_type - invalid option %s received" % ans self.send_answer("error") return "error" self.send_answer(newrun_type) newrun_user = "" self.send_answer("shift_crew") ans = self.get_command() if (ans == "client_close"): return "client_close" newrun_user = ans newrun_comment = "" self.send_answer("run_comment") ans = self.get_command() if (ans == "client_close"): return "client_close" newrun_comment = ans #self.write_log("Run number: "+str(newrun_number)) #self.write_log("Run type: "+newrun_type) #self.write_log("Run crew: "+newrun_user) #self.write_log("Run comment: "+newrun_comment) print "Run number: %d" % newrun_number print "Run type: %s" % newrun_type print "Run crew: %s" % newrun_user print "Run comment: %s" % newrun_comment # Set run configuration according to user's request self.run.change_run(newrun_number) self.run.run_type = newrun_type self.run.run_user = newrun_user self.run.run_comment = newrun_comment # Check if requested run number was not used before # Saves the day if more than one RunControl program is running at the same time (DON'T DO THAT!!!) if (self.run.run_number): run_is_in_db = self.db.is_run_in_db(self.run.run_number) if (run_is_in_db): #self.write_log("ERROR - Run "+str(self.run.run_number)+" is already in the DB: cannot use it again") #self.write_log("Please check if someone else is using this RunControl before retrying") print "ERROR - Run %d is already in the DB: cannot use it again" % self.run.run_number print "Please check if someone else is using this RunControl before retrying" self.send_answer("error_init") return "error" # Create run structure in the DB #self.write_log("Initializing Run "+str(self.run.run_number)) print "Initializing Run %d" % self.run.run_number self.run.create_run() if (self.run.run_number): self.db.set_run_time_init(self.run.run_number, self.now_str()) # Create directory to host log files #self.write_log("Creating log directory "+self.run.log_dir) print "Creating log directory %s" % self.run.log_dir self.run.create_log_dir() # Write run and boards configuration files #self.write_log("Writing configuration file "+self.run.config_file) print "Writing configuration file %s" % self.run.config_file self.run.write_config() for adc in (self.run.adcboard_list): #self.write_log("Writing configuration file "+adc.config_file) print "Writing configuration file %s" % adc.config_file adc.write_config() # Start DAQ for all boards self.send_answer("start_init") for adc in (self.run.adcboard_list): p_id = adc.start_daq() if p_id: #self.write_log("ADC board %02d - Started DAQ with process id %d"%(adc.board_id,p_id)) print "ADC board %02d - Started DAQ with process id %d" % ( adc.board_id, p_id) self.send_answer("adc " + str(adc.board_id) + " init") adc.status = "init" else: #self.write_log("ADC board %02d - ERROR: could not start DAQ"%adc.board_id) print "ADC board %02d - ERROR: could not start DAQ" % adc.board_id self.send_answer("adc " + str(adc.board_id) + " fail") adc.status = "fail" # Wait for all boards to finish initialization n_try = 0 while (1): all_boards_init = 1 all_boards_ready = 1 for adc in (self.run.adcboard_list): # Check if any board changed status if (adc.status == "init"): if (os.path.exists(adc.initok_file)): # Initialization ended OK #self.write_log("ADC board %02d - Initialized and ready for DAQ"%adc.board_id) print "ADC board %02d - Initialized and ready for DAQ" % adc.board_id self.send_answer("adc " + str(adc.board_id) + " ready") adc.status = "ready" elif (os.path.exists(adc.initfail_file)): # Problem during initialization #self.write_log("ADC board %02d - *** Initialization failed ***"%adc.board_id) print "ADC board %02d - *** Initialization failed ***" % adc.board_id self.send_answer("adc " + str(adc.board_id) + " fail") adc.status = "fail" else: # This board is still initializing all_boards_init = 0 # Check if any board is in fail status if (adc.status == "fail"): all_boards_ready = 0 if (all_boards_init == 0): # Some boards are still initializing: keep waiting n_try += 1 if (n_try >= 10): #self.write_log("*** ERROR *** One or more boards did not initialize within 10sec. Cannot start run") print "*** ERROR *** One or more boards did not initialize within 10sec. Cannot start run" if (self.run.run_number): self.db.set_run_status( self.run.run_number, 5) # Status 5: run with problems at initialization self.send_answer("init_timeout") return "error" time.sleep(1) elif (all_boards_ready): #self.write_log("All boards completed initialization: DAQ run can be started") print "All boards completed initialization: DAQ run can be started" if (self.run.run_number): self.db.set_run_status( self.run.run_number, 1) # Status 1: run correctly initialized self.send_answer("init_ready") return "initialized" else: #self.write_log("*** ERROR *** One or more boards failed the initialization. Cannot start run") print "*** ERROR *** One or more boards failed the initialization. Cannot start run" if (self.run.run_number): self.db.set_run_status( self.run.run_number, 5) # Status 5: run with problems at initialization self.send_answer("init_fail") return "initfail" def start_run(self): #self.write_log("Starting run") print "Starting run" if (self.run.run_number): self.db.set_run_time_start(self.run.run_number, self.now_str()) self.db.set_run_status(self.run.run_number, 2) # Status 2: run started # Create "start the run" tag file open(self.run.start_file, 'w').close() self.send_answer("run_started") # RunControl is now in "running" mode return "running" def stop_run(self): self.send_answer("run_comment_end") ans = self.get_command() if (ans == "client_close"): return "client_close" #self.write_log("End of Run comment: "+ans) print "End of Run comment: %s" % ans self.run.run_comment_end = ans #self.write_log("Stopping run") print "Stopping run" if (self.run.run_number): self.db.set_run_status(self.run.run_number, 3) # Status 3: run stopped normally return self.terminate_run() def abort_run(self): self.run.run_comment_end = "Run aborted" #self.write_log("Aborting run") print "Aborting run" if (self.run.run_number): self.db.set_run_status(self.run.run_number, 4) # Status 4: run aborted return self.terminate_run() def terminate_run(self): if (self.run.run_number): self.db.set_run_time_stop(self.run.run_number, self.now_str()) self.db.set_run_comment_end(self.run.run_number, self.run.run_comment_end) # Create "stop the run" tag file open(self.run.quit_file, 'w').close() # Run stop_daq procedure for each ADC board terminate_ok = True for adc in (self.run.adcboard_list): if adc.stop_daq(): self.send_answer("adc %d terminate_ok" % adc.board_id) #self.write_log("ADC board %02d - Terminated correctly"%adc.board_id) print "ADC board %02d - Terminated correctly" % adc.board_id else: terminate_ok = False self.send_answer("adc %d terminate_error" % adc.board_id) #self.write_log("ADC board %02d - WARNING: problems while terminating DAQ"%adc.board_id) print "ADC board %02d - WARNING: problems while terminating DAQ" % adc.board_id if (self.run.run_number): self.db.set_run_status( self.run.run_number, 6) # Status 6: run ended with errors # Clean up run directory for adc in (self.run.adcboard_list): if (os.path.exists(adc.initok_file)): os.remove(adc.initok_file) if (os.path.exists(adc.initfail_file)): os.remove(adc.initfail_file) if (os.path.exists(self.run.start_file)): os.remove(self.run.start_file) if (os.path.exists(self.run.quit_file)): os.remove(self.run.quit_file) if terminate_ok: self.send_answer("terminate_ok") else: self.send_answer("terminate_error") # At the end of this procedure RunControl is back to "idle" mode return "idle" def now_str(self): return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
class Run: def __init__(self): # Get account under which the RunControl runs self.user_account = os.getenv('USER', "daq") # Get location of DAQ main directory from PADME_DAQ_DIR # Default to current dir if not set self.daq_dir = os.getenv('PADME_DAQ_DIR', ".") # Get base port number for network tunnels from PADME_RC_TUNNEL_BASE_PORT self.base_port_number = int( os.getenv('PADME_RC_TUNNEL_BASE_PORT', "31400")) # Define id file for passwordless ssh command execution self.ssh_id_file = "%s/.ssh/id_rsa_daq" % os.getenv('HOME', "~") # Get location of padme-fw software from PADME # Default to PADME_DAQ_DIR/padme-fw if not set self.padme_fw = os.getenv('PADME', "%s/padme-fw" % self.daq_dir) # Define executables to use in production self.daq_executable = "%s/PadmeDAQ/PadmeDAQ.exe" % self.padme_fw self.merger_executable = "%s/Level1/PadmeMerger.exe" % self.padme_fw self.level1_executable = "%s/Level1/PadmeLevel1.exe" % self.padme_fw self.trigger_executable = "%s/PadmeTrig/PadmeTrig.exe" % self.padme_fw # Define directory containing setup subdirectories self.setup_root_dir = "%s/setup" % self.daq_dir # Define files where the current and last run name will be written self.current_run_file = "%s/run/current_run" % self.daq_dir self.last_run_file = "%s/run/last_run" % self.daq_dir # Define directory containing rawdata directories for each run self.rawdata_root_dir = self.daq_dir + "/local/rawdata" # Define name and position of control files self.control_dir = self.daq_dir + "/local/run" self.start_file = self.control_dir + "/start" self.quit_file = self.control_dir + "/quit" self.trig_start_file = self.control_dir + "/start_trig" self.trig_stop_file = self.control_dir + "/stop_trig" self.initok_file_head = self.control_dir + "/initok" self.initfail_file_head = self.control_dir + "/initfail" self.lock_file_head = self.control_dir + "/lock" # Connect to database services self.db = PadmeDB() # Do not define a default setup self.setup = "" # Run final status defaults to 3 (stopped normally) self.final_status = 3 self.set_default_config() def change_run(self): # Check if requested run number was not used before # Saves the day if more than one RunControl program is running at the same time (DON'T DO THAT!!!) if (self.run_number): run_is_in_db = self.db.is_run_in_db(self.run_number) if (run_is_in_db): print "Run::change_run - ERROR - Run %d is already in the DB: cannot use it again" % self.run_number print "Please check if someone else is using this RunControl before retrying" #self.send_answer("error_init") return False # Define run name using run number and start time self.run_name = "run_%7.7d_%s" % ( self.run_number, time.strftime("%Y%m%d_%H%M%S", time.gmtime())) # Write run name to current_run file for monitoring with open(self.current_run_file, "w") as lf: lf.write("%s\n" % self.run_name) self.run_dir = self.daq_dir + "/runs/" + self.run_name self.config_dir = "%s/cfg" % self.run_dir self.config_file = "%s.cfg" % self.run_name self.config_file_head = self.run_name self.log_dir = "%s/log" % self.run_dir self.log_file_head = self.run_name self.stream_dir = "%s/local/streams/%s" % (self.daq_dir, self.run_name) self.stream_head = self.run_name self.rawdata_dir = "%s/%s" % (self.rawdata_root_dir, self.run_name) self.rawdata_head = self.run_name # Make sure Merger runs on a different node after each run self.merger_node = self.next_merger_node() # Configure Merger for this run self.runconfig_merger(self.merger) # Configure Trigger for this run self.runconfig_trigger(self.trigger) # Configure ADC boards for this run for adcboard in self.adcboard_list: #print "--- Configuring for run board %d"%adcboard.board_id self.runconfig_adcboard(adcboard) # Configure Level1 processes for this run for level1 in self.level1_list: self.runconfig_level1(level1) # If this is a real run, create it in the DB if (self.run_number): print "Creating Run %d structure in DB" % self.run_number if self.create_run_in_db() == "error": print "Run::change_run - ERROR - Cannot create Run in the DB" return False return True def next_merger_node(self): # If merger node is currently not defined, set it to first in list or localhost if not self.merger_node: if self.merger_node_list: return self.merger_node_list[0] else: return "localhost" # If the list of nodes was not defined, keep using always the same node if not self.merger_node_list: return self.merger_node # Find current node in node list and use next one use_next = False for node in self.merger_node_list: if use_next: return node if node == self.merger_node: use_next = True # Current node was the last in the list, so we restart with the first node in the list if use_next: return self.merger_node_list[0] # Handle misconfigurations (should never happen) print "Run::next_merger_node - WARNING: current merger host %s is not in the merger node list %s" % ( self.merger_node, self.merger_node_list) print " Will use first node in list: %s" % self.merger_node_list[ 0] return self.merger_node_list[0] def set_default_config(self): # Clean up Run configuration and set all run parameters to default self.adcboard_list = [] self.trigger = None self.merger = None self.level1_list = [] self.run_number = 0 self.run_name = "run_%7.7d_%s" % ( self.run_number, time.strftime("%Y%m%d_%H%M%S", time.gmtime())) self.run_type = "TEST" self.run_user = "******" self.run_comment_start = "Generic start of run" self.run_comment_end = "Generic end of run" self.run_dir = "%s/runs/%s" % (self.daq_dir, self.run_name) self.config_dir = "%s/cfg" % self.run_dir self.config_file = "%s.cfg" % self.run_name self.config_file_head = self.run_name self.log_dir = "%s/log" % self.run_dir self.log_file_head = self.run_name self.stream_dir = "%s/local/streams/%s" % (self.daq_dir, self.run_name) self.stream_head = self.run_name self.rawdata_dir = "%s/%s" % (self.rawdata_root_dir, self.run_name) self.rawdata_head = self.run_name self.trigger_node = "localhost" self.merger_node = "localhost" self.merger_node_list = [] self.level1_nproc = 1 self.level1_maxevt = 10000 self.total_daq_time = 0 def read_setup(self): # Define regular expressions used in file parsing re_empty = re.compile("^\s*$") re_comment = re.compile("^\s*#") re_param = re.compile("^\s*(\w+)\s+(.+?)\s*$") re_boardid = re.compile("\d+") re_board_link = re.compile("^\s*(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s*$") # Read general run configuration from file setup_file = "%s/%s/run.cfg" % (self.setup_root_dir, self.setup) if (not os.path.isfile(setup_file)): print "Run - ERROR: setup file %s not found" % setup_file return "error" f = open(setup_file) self.boardid_list = [] self.board_link_list = [] for l in f: if (re_empty.search(l) or re_comment.search(l)): continue m = re_param.search(l) if (m): (p_name, p_value) = m.group(1, 2) if (p_name == "boardid_list"): # Get sorted list of board ids while removing duplicates s_boards = sorted(list(set(re_boardid.findall(p_value))), key=int) for s_bid in s_boards: # Convert to int as findall returns strings self.boardid_list.append(int(s_bid)) elif (p_name == "board_link"): mm = re_board_link.search(p_value) if (mm): self.board_link_list.append(mm.group(1, 2, 3, 4)) else: print "Run - WARNING: unable to decode board_link parameter while reading setup file %s" % ( setup_file, ) print l elif (p_name == "total_daq_time"): self.total_daq_time = int(p_value) elif (p_name == "trigger_node"): self.trigger_node = p_value #elif (p_name == "trigger_mask"): # self.trigger_mask = p_value elif (p_name == "merger_node"): self.merger_node = p_value elif (p_name == "merger_node_list"): self.merger_node_list = p_value.split() elif (p_name == "level1_nproc"): self.level1_nproc = int(p_value) elif (p_name == "level1_maxevt"): self.level1_maxevt = int(p_value) else: print "Run - WARNING: unknown parameter %s found while reading setup file %s" % ( p_name, setup_file) else: print "Run - WARNING: unknown line format found while reading setup file %s" % ( p_name, setup_file) print l f.close() return "ok" def config_list(self): cfg_list = [] cfg_list.append(["user_account", self.user_account]) cfg_list.append(["daq_dir", self.daq_dir]) cfg_list.append(["base_port_number", self.base_port_number]) cfg_list.append(["ssh_id_file", self.ssh_id_file]) cfg_list.append(["daq_executable", self.daq_executable]) cfg_list.append(["trigger_executable", self.trigger_executable]) cfg_list.append(["merger_executable", self.merger_executable]) cfg_list.append(["level1_executable", self.level1_executable]) cfg_list.append(["start_file", self.start_file]) cfg_list.append(["quit_file", self.quit_file]) cfg_list.append(["trig_start_file", self.trig_start_file]) cfg_list.append(["trig_stop_file", self.trig_stop_file]) cfg_list.append(["initok_file_head", self.initok_file_head]) cfg_list.append(["initfail_file_head", self.initfail_file_head]) cfg_list.append(["lock_file_head", self.lock_file_head]) cfg_list.append(["rawdata_dir", self.rawdata_dir]) cfg_list.append(["run_number", str(self.run_number)]) cfg_list.append(["run_name", self.run_name]) cfg_list.append(["run_dir", self.run_dir]) cfg_list.append(["run_type", self.run_type]) cfg_list.append(["run_user", self.run_user]) cfg_list.append(["run_comment_start", self.run_comment_start]) cfg_list.append(["setup", self.setup]) s_board_list = "" for b in self.boardid_list: if (s_board_list): s_board_list += " %d" % b else: s_board_list = "%d" % b cfg_list.append(["board_list", s_board_list]) for b in self.boardid_list: for link in self.board_link_list: (board, host, port, node) = link if b == int(board): board_link = "%s %s %s %s" % (board, host, port, node) cfg_list.append([ "board_link", "%s %s %s %s" % (board, host, port, node) ]) cfg_list.append(["config_dir", self.config_dir]) cfg_list.append(["config_file", self.config_file]) cfg_list.append(["config_file_head", self.config_file_head]) cfg_list.append(["log_dir", self.log_dir]) cfg_list.append(["log_file_head", self.log_file_head]) cfg_list.append(["stream_dir", self.stream_dir]) cfg_list.append(["stream_head", self.stream_head]) cfg_list.append(["rawdata_dir", self.rawdata_dir]) cfg_list.append(["rawdata_head", self.rawdata_head]) cfg_list.append(["trigger_node", self.trigger_node]) if self.merger_node: cfg_list.append(["merger_node", self.merger_node]) if self.merger_node_list: cfg_list.append( ["merger_node_list", " ".join(self.merger_node_list)]) cfg_list.append(["level1_nproc", str(self.level1_nproc)]) cfg_list.append(["level1_maxevt", str(self.level1_maxevt)]) cfg_list.append(["total_daq_time", str(self.total_daq_time)]) return cfg_list def format_config(self): cfgstring = "" for cfg in self.config_list(): cfgstring += "%-30s %s\n" % (cfg[0], cfg[1]) return cfgstring def create_run_in_db(self): # Create run in DB self.db.create_run(self.run_number, self.run_name, self.run_type) self.db.set_run_time_create(self.run_number, self.db.now_str()) self.db.set_run_user(self.run_number, self.run_user) self.db.set_run_comment_start(self.run_number, self.db.now_str(), self.run_comment_start) # Add all configuration parameters for cfg in self.config_list(): self.db.add_cfg_para_run(self.run_number, cfg[0], cfg[1]) # Create board structures in DB for adc in (self.adcboard_list): if adc.create_proc_daq() == "error": print "Run::create_run - ERROR - Cannot create DAQ process for board %d in the DB" % adc.board_id return "error" if adc.create_proc_zsup() == "error": print "Run::create_run - ERROR - Cannot create ZSUP process for board %d in the DB" % adc.board_id return "error" # Create Trigger structure in DB if self.trigger.create_trigger() == "error": print "Run::create_run - ERROR - Cannot create Trigger process in the DB" return "error" # Create Merger structure in DB if self.merger.create_merger() == "error": print "Run::create_run - ERROR - Cannot create Merger process in the DB" return "error" # Create Level1 structures in DB for lvl1 in (self.level1_list): if lvl1.create_level1() == "error": print "Run::create_run - ERROR - Cannot create Level1 process for level1 %d in the DB" % lvl1.level1_id return "error" return "ok" def create_log_dir(self): # Create log directory for this run (make sure the full tree is there) if not os.path.exists(self.log_dir): os.makedirs(self.log_dir, 0755) def write_config(self): # Create config directory for this run (make sure the full tree is there) if not os.path.exists(self.config_dir): os.makedirs(self.config_dir, 0755) print "Writing configuration file %s" % self.config_file f = open(self.config_dir + "/" + self.config_file, "w") f.write(self.format_config()) f.close() print "Writing configuration file %s for Merger" % self.merger.config_file self.merger.write_config() print "Writing configuration file %s for Trigger" % self.trigger.config_file self.trigger.write_config() for adc in (self.adcboard_list): print "Writing configuration files %s and %s for ADC board %d" % ( adc.config_file_daq, adc.config_file_zsup, adc.board_id) adc.write_config() for lvl1 in (self.level1_list): print "Writing configuration files %s for Level1 %d" % ( lvl1.config_file, lvl1.level1_id) lvl1.write_config() def print_config(self): print self.format_config() def create_fifos(self): # Create stream directory and fifo file for Trigger if self.trigger.node_id == 0: if not os.path.exists(self.stream_dir): os.makedirs(self.stream_dir, 0755) os.mkfifo(self.trigger.output_stream) else: command = "ssh -i %s %s '( mkdir -p %s ; mkfifo %s )'" % ( self.ssh_id_file, self.trigger.node_ip, self.stream_dir, self.trigger.output_stream) print command os.system(command) # Create stream directories and fifo files for ADCBoards for adc in (self.adcboard_list): if adc.node_id == 0: if not os.path.exists(self.stream_dir): os.makedirs(self.stream_dir, 0755) os.mkfifo(adc.output_stream_daq) os.mkfifo(adc.output_stream_zsup) else: command = "ssh -i %s %s '( mkdir -p %s ; mkfifo %s %s )'" % ( self.ssh_id_file, adc.node_ip, self.stream_dir, adc.output_stream_daq, adc.output_stream_zsup) print command os.system(command) # Create stream directory on Merger if self.merger.node_id == 0: if not os.path.exists(self.stream_dir): os.makedirs(self.stream_dir, 0755) else: command = "ssh -i %s %s '( mkdir -p %s )'" % ( self.ssh_id_file, self.merger.node_ip, self.stream_dir) print command os.system(command) # Create fifo files to connect Merger to Level1 processes for lvl1 in (self.level1_list): if lvl1.node_id == 0: os.mkfifo(lvl1.input_stream) else: command = "ssh -i %s %s '( mkfifo %s )'" % ( self.ssh_id_file, lvl1.node_ip, lvl1.input_stream) print command os.system(command) # If Trigger process runs on a node which is different from the Merger # we must replicate the Trigger FIFO file on the Merger if self.trigger.node_id != self.merger.node_id: # Duplicate Trigger FIFO file on Merger node if self.merger.node_id == 0: os.mkfifo(self.trigger.output_stream) else: command = "ssh -n -i %s %s '( mkfifo %s )'" % ( self.ssh_id_file, self.merger.node_ip, self.trigger.output_stream) print command os.system(command) # If an ADCBoard process runs on a node which is different from the Merger # we must replicate the ZSUP FIFO file on the Merger if self.merger.node_id == 0: for adc in (self.adcboard_list): if adc.node_id != self.merger.node_id: os.mkfifo(adc.output_stream_zsup) else: # More efficient if we give a single (long) ssh command stream_list = "" for adc in (self.adcboard_list): if adc.node_id != self.merger.node_id: stream_list += " %s" % adc.output_stream_zsup if stream_list: command = "ssh -n -i %s %s '( mkfifo %s )'" % ( self.ssh_id_file, self.merger.node_ip, stream_list) print command os.system(command) def create_receivers(self): # Keep track of receiver processes and handles: needed for final celeanup self.proc_rcv = [] self.hand_rcv = [] # If an ADCBoard process runs on a node which is different from the Merger # we create the receiving end of a network tunnel for adc in (self.adcboard_list): # Check if ADCBoard and Merger run on different nodes if adc.node_id != self.merger.node_id: # Define port for network tunnel port_number = self.base_port_number + adc.board_id print "Creating receiving end of network tunnel for board %d on port %d" % ( adc.board_id, port_number) # Define log file and open it log_file = "%s/%s_nc_%d_recv.log" % ( self.log_dir, self.log_file_head, port_number) log_handle = open(log_file, "w") self.hand_rcv.append(log_handle) # Open receiving end of tunnel on Merger node command = "nc -l -k -v --recv-only %s %d > %s < /dev/zero" % ( self.merger.node_ip, port_number, adc.output_stream_zsup) #command = "nc --udp -l -v --recv-only %s %d > %s < /dev/zero"%(self.merger.node_ip,port_number,adc.output_stream_zsup) if self.merger.node_id != 0: command = "ssh -f -i %s %s '( %s )'" % ( self.ssh_id_file, self.merger.node_ip, command) print command try: proc = subprocess.Popen(shlex.split(command), stdout=log_handle, stderr=subprocess.STDOUT, bufsize=1) self.proc_rcv.append(proc) except OSError as e: print "Run::create_receivers - ERROR: Execution failed: %s", e time.sleep(0.5) # If the Trigger process runs on a node which is different from the Merger # we create the receiving end of a network tunnel if self.trigger.node_id != self.merger.node_id: # Define port for network tunnel port_number = self.base_port_number + 99 print "Creating receiving end of network tunnel for trigger on port %d" % port_number # Define log file and open it log_file = "%s/%s_nc_%d_recv.log" % ( self.log_dir, self.log_file_head, port_number) log_handle = open(log_file, "w") self.hand_rcv.append(log_handle) # Open receiving end of tunnel on Merger node #command = "nc -l -k -v --recv-only %s %d > %s < /dev/zero"%(self.merger.node_ip,port_number,self.trigger.output_stream) command = "nc --udp -l -v --recv-only %s %d > %s < /dev/zero" % ( self.merger.node_ip, port_number, self.trigger.output_stream) if self.merger.node_id != 0: command = "ssh -f -i %s %s '( %s )'" % ( self.ssh_id_file, self.merger.node_ip, command) print command try: proc = subprocess.Popen(shlex.split(command), stdout=log_handle, stderr=subprocess.STDOUT, bufsize=1) self.proc_rcv.append(proc) except OSError as e: print "Run::create_receivers - ERROR: Execution failed: %s", e time.sleep(0.5) def create_senders(self): # Keep track of sender processes and handles: needed for final celeanup self.proc_snd = [] self.hand_snd = [] # If an ADCBoard process runs on a node which is different from the Merger # we create the sending end of a network tunnel for adc in (self.adcboard_list): # Check if ADCBoard and Merger run on different nodes if adc.node_id != self.merger.node_id: # Define port for network tunnel port_number = self.base_port_number + adc.board_id print "Creating sending end of network tunnel for board %d on port %d" % ( adc.board_id, port_number) # Define log file and open it log_file = "%s/%s_nc_%d_send.log" % ( self.log_dir, self.log_file_head, port_number) log_handle = open(log_file, "w") self.hand_snd.append(log_handle) # Open sending end of tunnel on DAQ node. Add some code to wait for receiving end to appear before proceeding. command = "while ! nc -z %s %d ; do sleep 1 ; done ; nc -v --send-only %s %d < %s > /dev/null" % ( self.merger.node_ip, port_number, self.merger.node_ip, port_number, adc.output_stream_zsup) #command = "nc -v --udp --send-only %s %d < %s > /dev/null"%(self.merger.node_ip,port_number,adc.output_stream_zsup) if adc.node_id != 0: command = "ssh -f -i %s %s '( %s )'" % ( self.ssh_id_file, adc.node_ip, command) print command try: proc = subprocess.Popen(shlex.split(command), stdout=log_handle, stderr=subprocess.STDOUT, bufsize=1) self.proc_snd.append(proc) except OSError as e: print "Run::create_senders - ERROR: Execution failed: %s", e time.sleep(0.5) # If the Trigger process runs on a node which is different from the Merger # we create the sending end of a network tunnel if self.trigger.node_id != self.merger.node_id: # Define port for network tunnel port_number = self.base_port_number + 99 print "Creating sending end of network tunnel for trigger on port %d" % port_number # Define log file and open it log_file = "%s/%s_nc_%d_send.log" % ( self.log_dir, self.log_file_head, port_number) log_handle = open(log_file, "w") self.hand_snd.append(log_handle) # Open sending end of tunnel on Trigger node. Add some code to wait for receiving end to appear before proceeding. #command = "while ! nc -z %s %d ; do sleep 1 ; done ; nc -v --send-only %s %d < %s > /dev/null"%(self.merger.node_ip,port_number,self.merger.node_ip,port_number,self.trigger.output_stream) command = "nc -v --udp --send-only %s %d < %s > /dev/null" % ( self.merger.node_ip, port_number, self.trigger.output_stream) if adc.node_id != 0: command = "ssh -f -i %s %s '( %s )'" % ( self.ssh_id_file, self.trigger.node_ip, command) print command try: proc = subprocess.Popen(shlex.split(command), stdout=log_handle, stderr=subprocess.STDOUT, bufsize=1) self.proc_snd.append(proc) except OSError as e: print "Run::create_senders - ERROR: Execution failed: %s", e time.sleep(0.5) def create_merger_input_list(self): print "Creating merger input list file %s" % self.merger.input_list f = open(self.merger.input_list, "w") for adcboard in self.adcboard_list: f.write("%d %s\n" % (adcboard.board_id, adcboard.output_stream_zsup)) f.write("%d %s\n" % (99, self.trigger.output_stream)) f.close() def create_merger_output_list(self): print "Creating merger output list file %s" % self.merger.output_list f = open(self.merger.output_list, "w") for lvl1 in self.level1_list: f.write("%s\n" % (lvl1.input_stream)) f.close() def create_level1_output_dirs(self): for level1 in self.level1_list: print "Creating output dir %s for level1 %d" % (level1.output_dir, level1.level1_id) if level1.node_id == 0: if not os.path.exists(level1.output_dir): os.makedirs(level1.output_dir, 0755) else: command = "ssh -i %s %s '( mkdir -p %s )'" % ( self.ssh_id_file, level1.node_ip, level1.output_dir) print command os.system(command) def change_setup(self, setup): # Reset run configuration to its default values self.set_default_config() # Read new setup self.setup = setup if (self.read_setup() == "error"): return "error" # Create new set of ADC board processes (DAQ and ZSUP) handlers self.daq_nodes_id_list = [] for b in self.boardid_list: print "Run - Configuring ADC board %d" % b adcboard = ADCBoard(b) self.configure_adcboard(adcboard) self.adcboard_list.append(adcboard) self.daq_nodes_id_list.append(adcboard.node_id) # Get unique list of DAQ nodes (needed to create start/stop files) self.daq_nodes_id_list = list(set(self.daq_nodes_id_list)) # Store ip addresses of DAQ nodes in a dictionary self.daq_nodes_ip_list = {} for node_id in self.daq_nodes_id_list: self.daq_nodes_ip_list[node_id] = self.db.get_node_daq_ip(node_id) # Create new Trigger process handler self.trigger = Trigger() self.configure_trigger(self.trigger) # Create new Merger process handler self.merger = Merger() self.configure_merger(self.merger) # Create new set of Level1 process handlers for l in range(self.level1_nproc): print "Run - Configuring Level1 process %d" % l lvl1_proc = Level1(l) self.configure_level1(lvl1_proc) self.level1_list.append(lvl1_proc) return setup def configure_adcboard(self, adcboard): # Configure ADC board DAQ/ZSUP processes after changing setup # Reset ADC board to default configuration adcboard.set_default_config() # Set executable adcboard.executable = self.daq_executable # Lock files (will contain PID of processes) adcboard.lock_file_daq = "%s_b%02d_daq" % (self.lock_file_head, adcboard.board_id) adcboard.lock_file_zsup = "%s_b%02d_zsup" % (self.lock_file_head, adcboard.board_id) # Control files needed to start/stop DAQ (will disappear) adcboard.start_file = self.start_file adcboard.quit_file = self.quit_file # Status files for initialization adcboard.initok_file_daq = "%s_b%02d_daq" % (self.initok_file_head, adcboard.board_id) adcboard.initok_file_zsup = "%s_b%02d_zsup" % (self.initok_file_head, adcboard.board_id) adcboard.initfail_file_daq = "%s_b%02d_daq" % (self.initfail_file_head, adcboard.board_id) adcboard.initfail_file_zsup = "%s_b%02d_zsup" % ( self.initfail_file_head, adcboard.board_id) # Define board connection information for link in self.board_link_list: (board, host, port, node) = link if (int(board) == adcboard.board_id): adcboard.node_id = self.db.get_node_id(host) adcboard.node_ip = self.db.get_node_daq_ip(adcboard.node_id) adcboard.conet2_link = int(port) adcboard.conet2_slot = int(node) # Define total DAQ time (default: 0, i.e. run forever) # In most cases the default is what you want adcboard.total_daq_time = self.total_daq_time # Read ADC settings from board setup file adcboard.read_setup(self.setup) def configure_trigger(self, trigger): # Configure Trigger process after changing setup # Reset Trigger process handler to default configuration trigger.set_default_config() # Set executable trigger.executable = self.trigger_executable # Lock file (will contain PID of process) trigger.lock_file = "%s_trigger" % self.lock_file_head # Control files needed to start/stop trigger generation trigger.start_file = self.trig_start_file trigger.quit_file = self.trig_stop_file # Status files for initialization trigger.initok_file = "%s_trigger" % self.initok_file_head trigger.initfail_file = "%s_trigger" % self.initfail_file_head # Set node where Trigger will run trigger.node_id = self.db.get_node_id(self.trigger_node) trigger.node_ip = self.db.get_node_daq_ip(trigger.node_id) # Define trigger mask to use for this setup #trigger.trigger_mask = self.trigger_mask # Define total DAQ time (default: 0, i.e. run forever) # In most cases the default is what you want trigger.total_daq_time = self.total_daq_time # Read Trigger settings from trigger setup file trigger.read_setup(self.setup) def configure_merger(self, merger): # Configure Merger process after changing setup # Reset Merger process handler to default configuration merger.set_default_config() # Set executable merger.executable = self.merger_executable def configure_level1(self, level1): # Configure Level1 processes after changing setup # Reset Level1 process handler to default configuration level1.set_default_config() # Set executable level1.executable = self.level1_executable # Set maximum number of events to write in a single file level1.max_events = self.level1_maxevt def runconfig_adcboard(self, adcboard): # Configure ADC board DAQ/ZSUP processes after changing run adcboard.run_number = self.run_number if (self.run_type == "FAKE"): adcboard.process_mode = "FAKE" else: adcboard.process_mode = "DAQ" s_bid = "b%02d" % adcboard.board_id adcboard.config_file_daq = "%s/%s_%s_daq.cfg" % ( self.config_dir, self.config_file_head, s_bid) adcboard.config_file_zsup = "%s/%s_%s_zsup.cfg" % ( self.config_dir, self.config_file_head, s_bid) adcboard.log_file_daq = "%s/%s_%s_daq.log" % ( self.log_dir, self.log_file_head, s_bid) adcboard.log_file_zsup = "%s/%s_%s_zsup.log" % ( self.log_dir, self.log_file_head, s_bid) adcboard.output_stream_daq = "%s/%s_%s_daq" % (self.stream_dir, self.stream_head, s_bid) adcboard.input_stream_zsup = adcboard.output_stream_daq adcboard.output_stream_zsup = "%s/%s_%s_zsup" % ( self.stream_dir, self.stream_head, s_bid) def runconfig_trigger(self, trigger): # Configure Trigger process after changing run trigger.run_number = self.run_number trigger.config_file = "%s/%s_trigger.cfg" % (self.config_dir, self.config_file_head) trigger.log_file = "%s/%s_trigger.log" % (self.log_dir, self.log_file_head) trigger.output_stream = "%s/%s_trigger" % (self.stream_dir, self.stream_head) def runconfig_merger(self, merger): # Configure Merger process after changing run merger.run_number = self.run_number # Get node_id and node_ip from DB merger.node_id = self.db.get_node_id(self.merger_node) merger.node_ip = self.db.get_node_daq_ip(merger.node_id) merger.config_file = "%s/%s_merger.cfg" % (self.config_dir, self.config_file_head) merger.log_file = "%s/%s_merger.log" % (self.log_dir, self.log_file_head) merger.input_list = "%s/%s_merger_input.list" % (self.config_dir, self.config_file_head) merger.output_list = "%s/%s_merger_output.list" % ( self.config_dir, self.config_file_head) def runconfig_level1(self, level1): # Configure Level1 processes after changing run level1.run_number = self.run_number # Get node_id and node_ip from DB using Merger node level1.node_id = self.db.get_node_id(self.merger_node) level1.node_ip = self.db.get_node_daq_ip(level1.node_id) s_lid = "lvl1_%02d" % level1.level1_id level1.config_file = "%s/%s_%s.cfg" % (self.config_dir, self.config_file_head, s_lid) level1.log_file = "%s/%s_%s.log" % (self.log_dir, self.log_file_head, s_lid) level1.input_stream = "%s/%s_%s" % (self.stream_dir, self.stream_head, s_lid) level1.output_dir = self.rawdata_dir level1.output_header = "%s_%s" % (self.rawdata_head, s_lid) def start(self): # Create the "start the run" tag file on all DAQ nodes print "Starting DAQs" for node_id in self.daq_nodes_id_list: if (node_id == 0): open(self.start_file, 'w').close() else: command = "ssh -n -i %s %s '( touch %s )'" % ( self.ssh_id_file, self.daq_nodes_ip_list[node_id], self.start_file) print command os.system(command) # Wait 5sec before enabling triggers time.sleep(5) # Enable triggers print "Enabling triggers" if (self.trigger.node_id) == 0: open(self.trig_start_file, 'w').close() else: command = "ssh -n -i %s %s '( touch %s )'" % ( self.ssh_id_file, self.trigger.node_ip, self.trig_start_file) print command os.system(command) # Update run status in DB if (self.run_number): self.db.set_run_time_start(self.run_number, self.db.now_str()) self.db.set_run_status(self.run_number, self.db.DB_RUN_STATUS_RUNNING) def stop(self): # Disable triggers print "Disabling triggers" if (self.trigger.node_id) == 0: open(self.trig_stop_file, 'w').close() else: command = "ssh -n -i %s %s '( touch %s )'" % ( self.ssh_id_file, self.trigger.node_ip, self.trig_stop_file) print command os.system(command) # Wait 1sec before telling all processes to stop time.sleep(1) # Create the "stop the run" tag file on all DAQ nodes for node_id in self.daq_nodes_id_list: if (node_id == 0): open(self.quit_file, 'w').close() else: command = "ssh -n -i %s %s '( touch %s )'" % ( self.ssh_id_file, self.daq_nodes_ip_list[node_id], self.quit_file) print command os.system(command) # Write run name to last_run file for monitoring with open(self.last_run_file, "w") as lf: lf.write("%s\n" % self.run_name) # Finalize run in DB if (self.run_number): self.db.set_run_status(self.run_number, self.final_status) self.db.set_run_time_stop(self.run_number, self.db.now_str()) self.db.set_run_comment_end(self.run_number, self.db.now_str(), self.run_comment_end) def clean_up(self): # Clean up control directories at end of run print "Cleaning up run directories" # Remove the "start/stop the run" tag files on all DAQ nodes for node_id in self.daq_nodes_id_list: if (node_id == 0): if (os.path.exists(self.start_file)): os.remove(self.start_file) if (os.path.exists(self.quit_file)): os.remove(self.quit_file) else: command = "ssh -n -i %s %s '( rm -f %s %s )'" % ( self.ssh_id_file, self.daq_nodes_ip_list[node_id], self.start_file, self.quit_file) print command os.system(command) # Remove the "start/stop the triggers" tag files on Trigger node if (self.trigger.node_id == 0): if (os.path.exists(self.trig_start_file)): os.remove(self.trig_start_file) if (os.path.exists(self.trig_stop_file)): os.remove(self.trig_stop_file) else: command = "ssh -n -i %s %s '( rm -f %s %s )'" % ( self.ssh_id_file, self.trigger.node_ip, self.trig_start_file, self.trig_stop_file) print command os.system(command) # Remove initok/initfail files for all ADC nodes for adc in (self.adcboard_list): if (adc.node_id == 0): if (os.path.exists(adc.initok_file_daq)): os.remove(adc.initok_file_daq) if (os.path.exists(adc.initok_file_zsup)): os.remove(adc.initok_file_zsup) if (os.path.exists(adc.initfail_file_daq)): os.remove(adc.initfail_file_daq) if (os.path.exists(adc.initfail_file_zsup)): os.remove(adc.initfail_file_zsup) else: command = "ssh -n -i %s %s '( rm -f %s %s %s %s)'" % ( self.ssh_id_file, adc.node_ip, adc.initok_file_daq, adc.initok_file_zsup, adc.initfail_file_daq, adc.initfail_file_zsup) print command os.system(command) # Remove initok/initfail files on Trigger node if (self.trigger.node_id == 0): if (os.path.exists(self.trigger.initok_file)): os.remove(self.trigger.initok_file) if (os.path.exists(self.trigger.initfail_file)): os.remove(self.trigger.initfail_file) else: command = "ssh -n -i %s %s '( rm -f %s %s)'" % ( self.ssh_id_file, self.trigger.node_ip, self.trigger.initok_file, self.trigger.initfail_file) print command os.system(command) # Stop all receiving nc processes on the merger nodes (clumsy but could not find another way) if (self.merger.node_id == 0): for proc in self.proc_rcv: proc.terminate() else: command = "ssh -i %s %s '( ps -fu %s | grep recv-only | grep -v bash | grep -v grep | awk \"{print \$2}\" | xargs kill )'" % ( self.ssh_id_file, self.merger.node_ip, os.getenv( 'USER', "daq")) print command os.system(command) # Now we can clean up all nc processes for proc in self.proc_snd: if proc.poll() != None: proc.wait() else: print "Run::clean_up - Problem closing sending nc process" for proc in self.proc_rcv: if proc.poll() != None: proc.wait() else: print "Run::clean_up - Problem closing receiving nc process" # Close all receiving/sending nc log files for handler in self.hand_rcv: handler.close() for handler in self.hand_snd: handler.close()