def threadable_process_node(node_list): """ <Purpose> The parent function that calls child functions to do the little work. From this function we can see the order of events: 1. upload tar 2. check that we got a response (if not add to unreachable for later) 3. run cleaning/setup scripts on remote machine 4. run actual test scripts on remote machine (files are grabbed after all scripts execute, called from step4) <Arguments> node_list: a list containing a single tuple of (user, remotehost) <Exceptions> None. <Side Effects> Modifies running thread counter. <Returns> None. """ try: # node is a list containing one tuple node = node_list[0] # upload the .tar file. # attempt to upload the .tar file to the computers. this'll modify a list of # computers that we didn't connect to succesfully,so we'll remove them from # the list of computers we want to run the rest of the scripts on. threadable_remote_upload_tar(node_list) # only continue if node was marked reachable if deploy_threading.node_was_reachable(node): # clean the node threadable_remote_cleanup_all(node_list) # run the scripts remotely now threadable_remote_run_all(node_list) # cleanup the files, but only if it's not an instructional machine # the reason for this is because it's NFS and files could still be # in use by the other machines. we'll add this to a special list # in our thread_communications dict and we'll then clean these up # when all threads are totally done if not node[1].startswith('128.'): threadable_cleanup_final(node_list) else: # check if array exists already deploy_threading.add_instructional_node(node) # decrement # of threads running except Exception, e: deploy_logging.logerror("Error in thread assigned to "+node[1]+\ " threadable_process_node ("+str(e)+")")
def threadable_remote_upload_tar(remote_machines): """ <Purpose> Uploads the deploy.tar to each machine before running anything. Machines that timeout are added to the unreachable_hosts list in the dictionary. <Arguments> remote_machines: list of tuples with (user, ip) IPs that we have to cleanup. <Exceptions> None. <Side Effects> Temporarily locks thread_communications dict which is used by other threads trying to upload (if they run into an error). <Returns> None. """ # Assume single element if it's not a list if type(remote_machines) != type([]): remote_machines = [remote_machines] # for every machine in our list... for machine_tuple in remote_machines: # split up the tuple username = machine_tuple[0] machine = machine_tuple[1] deploy_logging.log('Setup', 'Attemping tar file upload via scp on ' + machine) scp_errcode, scp_stdout, scp_stderr = upload_tar( username, str(machine)) out, err = deploy_logging.format_stdout_and_err(scp_stdout, scp_stderr) # check the error codes if str(scp_errcode) == '0': deploy_logging.log('Setup', ' scp file upload complete on ' + machine) elif str(scp_errcode) == '1': deploy_logging.logerror('Could not establish a connection with ' + machine + ' (' + err + ')') deploy_threading.add_unreachable_host((username, machine)) else: deploy_logging.logerror('scp returned unknown error code ' + str(scp_errcode) + ' (' + err + ')') deploy_threading.add_unreachable_host((username, machine))
def threadable_remote_upload_tar(remote_machines): """ <Purpose> Uploads the deploy.tar to each machine before running anything. Machines that timeout are added to the unreachable_hosts list in the dictionary. <Arguments> remote_machines: list of tuples with (user, ip) IPs that we have to cleanup. <Exceptions> None. <Side Effects> Temporarily locks thread_communications dict which is used by other threads trying to upload (if they run into an error). <Returns> None. """ # Assume single element if it's not a list if type(remote_machines) != type([]): remote_machines = [remote_machines] # for every machine in our list... for machine_tuple in remote_machines: # split up the tuple username = machine_tuple[0] machine = machine_tuple[1] deploy_logging.log('Setup', 'Attemping tar file upload via scp on '+machine) scp_errcode, scp_stdout, scp_stderr = upload_tar(username, str(machine)) out, err = deploy_logging.format_stdout_and_err(scp_stdout, scp_stderr) # check the error codes if str(scp_errcode) == '0': deploy_logging.log('Setup', ' scp file upload complete on '+machine) elif str(scp_errcode) == '1': deploy_logging.logerror('Could not establish a connection with '+machine+' ('+err+')') deploy_threading.add_unreachable_host((username, machine)) else: deploy_logging.logerror('scp returned unknown error code '+str(scp_errcode)+' ('+err+')') deploy_threading.add_unreachable_host((username, machine))
def upload_tar(user, remote_host, tar_filename="deploy.tar"): """ <Purpose> This function will upload the tar to the remote_host via scp by logging in as user@remote_host, and log the return code as well as anything printed to stderr or stdout (which is expected to be empty). Uses remote_upload_file to upload the actual file. <Arguments> user: the user to log in as on the remote machine. remote_host: the remote machine's IP to which we'll be uploading files. tar_filename: Optional. Default is deploy.tar. The tar file to upload to the remote host. <Exceptions> None. <Side Effects> None. <Returns> A tuple: (returncode, stdout, stderr) """ # call helper to scp stdoutdata, stderrdata, returncode = deploy_network.remote_upload_file( tar_filename, user, remote_host) # check the return code.. if returncode == 0: deploy_logging.log(remote_host, 'Successfully uploaded deploy.tar') else: deploy_logging.logerror(remote_host + ': Trouble uploading deploy.tar') return (str(returncode), stdoutdata, stderrdata)
def upload_tar(user, remote_host, tar_filename = "deploy.tar"): """ <Purpose> This function will upload the tar to the remote_host via scp by logging in as user@remote_host, and log the return code as well as anything printed to stderr or stdout (which is expected to be empty). Uses remote_upload_file to upload the actual file. <Arguments> user: the user to log in as on the remote machine. remote_host: the remote machine's IP to which we'll be uploading files. tar_filename: Optional. Default is deploy.tar. The tar file to upload to the remote host. <Exceptions> None. <Side Effects> None. <Returns> A tuple: (returncode, stdout, stderr) """ # call helper to scp stdoutdata, stderrdata, returncode = deploy_network.remote_upload_file(tar_filename, user, remote_host) # check the return code.. if returncode == 0: deploy_logging.log(remote_host, 'Successfully uploaded deploy.tar') else: deploy_logging.logerror(remote_host+': Trouble uploading deploy.tar') return (str(returncode), stdoutdata, stderrdata)
def init(): """ <Purpose> Initializes all the globals and things to the default values and starts the thread that deals with killing processes started that have timed out. <Arguments> None. <Exceptions> Critical exception thrown if thread monitor could not be started. <Side Effects> None. <Returns> Boolean. True on success. False on failure. """ # initialize, keep track of how many threads are running thread_communications['threads_running'] = 0 # set the kill flag to false and start the thread monitoring pids thread_communications['kill_flag'] = False # tells the module it has been initialized thread_communications['init'] = True try: thread.start_new_thread(pid_timeout, ()) except Exception, e: deploy_logging.logerror("Trouble starting pid thread monitor") return False
def deploy(): """ <Purpose> This function is the brains behind the deploy script. All the main calls originate from this function. -Gets list of remote hosts from a file -Calls function to execute cleanup/setup on remote hosts before we can run remote scripts and then that same function executes the remote script files <Arguments> None. <Exceptions> Exit if hostlist file was not found. <Side Effects> None. <Returns> None. """ # Get list of hosts myhosts = get_remote_hosts_from_file() if not myhosts: # if we didn't find any hosts.. crap out! print "Didn't find any remote hosts file!" deploy_logging.logerror("Didn't find any remote hosts file!") # return if we don't have instructional machines to process if 'machine_list' not in deploy_threading.thread_communications.keys(): return else: # check if we also have intructional machines, and if we do, then # make sure we're not being tricked - remove all instructional machines # from the myhosts list if 'machine_list' in deploy_threading.thread_communications.keys(): # we have instructional machines machine_list = deploy_threading.thread_communications['machine_list'] myhosts = list(set(myhosts)-set(machine_list)) # initialize thread_communications dictionary to a list which will have # our unreachable hosts deploy_threading.thread_communications['unreachable_host'] = [] # this will keep track of the proc id's that are launched on different # threads. These are ssh/scp processes. We keep track of these because # we want to make sure that when we exit deploy.py, we kill all of these # processes - they should be killed by that time unless there was some kind # of error. deploy_threading.thread_communications['running_process_ids'] = [] # initial run connect_and_do_work(myhosts) # now do the same for the instructional machines if we have any: if 'machine_list' in deploy_threading.thread_communications.keys(): connect_and_do_work(deploy_threading.thread_communications['machine_list'], 3) # if we had unreachable hosts.. if deploy_threading.has_unreachable_hosts(): # Currently, set NOT to retry hosts. Since it's running regularly as a service, # there is no need as 99% of these hosts time out anyway, so it just takes # a lot longer than it should. for i in range(0): # increase timeout time by 25% each time deploy_network.default_connection_timeout =\ str(int(float(deploy_network.default_connection_timeout) * 1.25)) # 1. use list of unreachable hosts list as our list to retry last_failed_hosts = deploy_threading.thread_communications['unreachable_host'] # 2. reset the unreachable hosts list deploy_threading.thread_communications['unreachable_host'] = [] deploy_logging.log("Notice", "Trying to connect to failed hosts (connection attempt #"+str(i+2)+")") connect_and_do_work(last_failed_hosts) print "Checking that all child threads/processes are dead..." for each_tuple in deploy_threading.thread_communications['running_process_ids']: try: # tuple is (pid, expiretime, remotehost, username) procid = int(each_tuple[0]) os.kill(procid, 9) except OSError, ose: pass except Exception, e: print "Something went wrong while trying to kill process "+\ str(procid)+", "+str(e)
# if first chars match what we want ('!user:'******'!user:'******'!user:'******'\n '): # and ignore comments (lines starting with #) if line.strip('\n ')[0] != '#': # if we get here, then we have an IP so we need to check that # user is not empty.. log err if it is and complain. if not current_username: deploy_logging.logerror('Critical Error: No username specified for remote host group!') file_of_ips.close() return False # add (username, remote_host) pair while casting remote_host to lowercase in case # it's a hostname for easy comparison if needed everywhere users_ip_tuple_list.append((current_username, line.rstrip('\n ').lower())) # set flag that we have at least one ip have_one_ip = True # return true only if we have at least ONE ip that we added to the list # and not just a bunch of users if have_one_ip: # lets make the list a set, which is a cheap way of getting rid of # duplicates, then cast back to list. finalized_list = list(set(users_ip_tuple_list))
def deploy(): """ <Purpose> This function is the brains behind the deploy script. All the main calls originate from this function. -Gets list of remote hosts from a file -Calls function to execute cleanup/setup on remote hosts before we can run remote scripts and then that same function executes the remote script files <Arguments> None. <Exceptions> Exit if hostlist file was not found. <Side Effects> None. <Returns> None. """ # Get list of hosts myhosts = get_remote_hosts_from_file() if not myhosts: # if we didn't find any hosts.. crap out! print "Didn't find any remote hosts file!" deploy_logging.logerror("Didn't find any remote hosts file!") # return if we don't have instructional machines to process if 'machine_list' not in deploy_threading.thread_communications.keys(): return else: # check if we also have intructional machines, and if we do, then # make sure we're not being tricked - remove all instructional machines # from the myhosts list if 'machine_list' in deploy_threading.thread_communications.keys(): # we have instructional machines machine_list = deploy_threading.thread_communications[ 'machine_list'] myhosts = list(set(myhosts) - set(machine_list)) # initialize thread_communications dictionary to a list which will have # our unreachable hosts deploy_threading.thread_communications['unreachable_host'] = [] # this will keep track of the proc id's that are launched on different # threads. These are ssh/scp processes. We keep track of these because # we want to make sure that when we exit deploy.py, we kill all of these # processes - they should be killed by that time unless there was some kind # of error. deploy_threading.thread_communications['running_process_ids'] = [] # initial run connect_and_do_work(myhosts) # now do the same for the instructional machines if we have any: if 'machine_list' in deploy_threading.thread_communications.keys(): connect_and_do_work( deploy_threading.thread_communications['machine_list'], 3) # if we had unreachable hosts.. if deploy_threading.has_unreachable_hosts(): # Currently, set NOT to retry hosts. Since it's running regularly as a service, # there is no need as 99% of these hosts time out anyway, so it just takes # a lot longer than it should. for i in range(0): # increase timeout time by 25% each time deploy_network.default_connection_timeout =\ str(int(float(deploy_network.default_connection_timeout) * 1.25)) # 1. use list of unreachable hosts list as our list to retry last_failed_hosts = deploy_threading.thread_communications[ 'unreachable_host'] # 2. reset the unreachable hosts list deploy_threading.thread_communications['unreachable_host'] = [] deploy_logging.log( "Notice", "Trying to connect to failed hosts (connection attempt #" + str(i + 2) + ")") connect_and_do_work(last_failed_hosts) print "Checking that all child threads/processes are dead..." for each_tuple in deploy_threading.thread_communications[ 'running_process_ids']: try: # tuple is (pid, expiretime, remotehost, username) procid = int(each_tuple[0]) os.kill(procid, 9) except OSError, ose: pass except Exception, e: print "Something went wrong while trying to kill process "+\ str(procid)+", "+str(e)
def remote_download_file(remote_fn_path, local_fn_path, user, remote_host, retry_on_refusal = 3, connect_timeout = default_connection_timeout): """ <Purpose> This uses scp to download a file from a remote computer. <Arguments> remote_fn_path: The path to the file to download (remote file) local_fn_path: Where do we put it on this computer? user: user to log in as remote_host: the ip/name of the machine we're connecting to. retry_on_refusal: Optional. Integer. Has number of times to retry the connection IF it was refused (built in to take care of not 'spamming' the remote server) connect_timeout: Optional. Integer. Time in seconds for ssh to timeout if no response was received. <Exceptions> None. <Side Effects> None. <Returns> Tuple. (out, err, returncode) Details: out: stdout from scp err: err from ssh returncode: scp's exit code """ # local_fn_path will have the path + name of file # get the fn by doing some string math.. dir_to_local_file, junk, localfn = local_fn_path.rpartition('/') # is the dir real? if not os.path.isdir(dir_to_local_file): deploy_logging.logerror('Local destination directory does not exist.') raise Exception('Please check calling method.') # the SCP handle used scp_proc_handle = subprocess.Popen('scp -o BatchMode=yes -o '+\ 'ConnectTimeout='+str(connect_timeout)+' -o StrictHostKeyChecking=no '+\ ' '+user+'@'+remote_host+':'+remote_fn_path+\ ' '+local_fn_path, shell = True, stdout = subprocess.PIPE, stderr = subprocess.PIPE) # set the PID of the process so we can set a timeout later scp_proc_pid = scp_proc_handle.pid # start thread to monitor timeouts (on another thread) deploy_threading.monitor_timeout(scp_proc_pid, int(connect_timeout), remote_host, user) # execute out, err = scp_proc_handle.communicate('') returncode = scp_proc_handle.returncode # retry if conn. was refused? if retry_on_refusal: # check if we got a connection refused. if we did, could be cuz we're spamming # the server, so sleep and then try again didwesleep = sleep_on_conn_refused(out, err, retry_on_refusal, remote_host) # we slept, so call function again and try to execute if didwesleep: # run again, but this time decrement retry counter out, err, returncode = remote_download_file(remote_fn_path, local_fn_path, user, remote_host, retry_on_refusal - 1, connect_timeout = default_connection_timeout) # format the string out, err = deploy_logging.format_stdout_and_err(out, err) return out, err, returncode
def remote_upload_file(local_fn_path, user, remote_host, retry_on_refusal = 3, connect_timeout = default_connection_timeout): """ <Purpose> This uses scp to upload a file to a remote computer. <Arguments> local_fn_path: Which file do we chuck to the remote computer? user: user to log in as remote_host: the ip/name of the machine we're connecting to. retry_on_refusal: Optional. Integer. Has number of times to retry the connection IF it was refused (built in to take care of not 'spamming' the remote server) connect_timeout: Optional. Integer. Time in seconds for ssh to timeout if no response was received. <Exceptions> None. <Side Effects> None. <Returns> Tuple. (out, err, returncode) Details: out: stdout from scp err: err from ssh returncode: scp's exit code """ # check that local file exists. if not os.path.isfile(local_fn_path): deploy_logging.logerror('Problem with local file: it does not exist!') raise Exception('Please check calling method.') scp_proc_handle = subprocess.Popen('scp -o BatchMode=yes -o '+\ 'ConnectTimeout='+str(connect_timeout)+' -o StrictHostKeyChecking=no '+\ ' '+local_fn_path+' '+user+"@"+remote_host+":", shell = True, stdout = subprocess.PIPE, stderr = subprocess.PIPE) scp_proc_pid = scp_proc_handle.pid # start thread to monitor timeouts (on another thread) deploy_threading.monitor_timeout(scp_proc_pid, int(connect_timeout), remote_host, user) # execute and block until done... out, err = scp_proc_handle.communicate('') returncode = scp_proc_handle.returncode # retry if conn. was refused? if retry_on_refusal: # check if we got a connection refused. if we did, could be cuz we're # spamming the server, so sleep and then try again didwesleep = sleep_on_conn_refused(out, err, retry_on_refusal, remote_host) # we slept, so call function again and try to execute if didwesleep: # run again, but this time decrement retry counter out, err, returncode = remote_upload_file(local_fn_path, user, remote_host, retry_on_refusal - 1, connect_timeout = default_connection_timeout) # format the string out, err = deploy_logging.format_stdout_and_err(out, err) return out, err, returncode
def remote_download_dir(remote_source_dir, local_dest_dir, user, remote_host, retry_on_refusal = 3, connect_timeout = default_connection_timeout): """ <Purpose> This uses scp to download a directory from a remote computer. <Arguments> remote_source_dir: The path to the directory to download (remote directory) local_dest_dir: Where do we put it on this computer? user: user to log in as remote_host: the ip/name of the machine we're connecting to. retry_on_refusal: Optional. Integer. Has number of times to retry the connection IF it was refused (built in to take care of not 'spamming' the remote server) connect_timeout: Optional. Integer. Time in seconds for ssh to timeout if no response was received. <Exceptions> None. <Side Effects> None. <Returns> Tuple. (out, err, returncode) Details: out: stdout from scp err: err from ssh returncode: scp's exit code """ # the dir one level 'up' from the our destination dir must exist, so lets # grab it by doing some string math.. remove trailing . and then partition local_dest_dir_parent, junk, morejunk = local_dest_dir.strip('/').rpartition('/') # if our local destination directory does not exist then complain. if not os.path.isdir(local_dest_dir_parent): deploy_logging.logerror(local_dest_dir) deploy_logging.logerror(local_dest_dir_parent) deploy_logging.logerror('Problem with local directory: it does not exist!') raise Exception('Please check calling method.') # get the scp handle scp_proc_handle = subprocess.Popen('scp -r -o BatchMode=yes -o '+ 'ConnectTimeout='+str(connect_timeout)+' -o StrictHostKeyChecking=no '+\ user+'@'+remote_host+':'+remote_source_dir+\ ' '+local_dest_dir, shell = True, stdout = subprocess.PIPE, stderr = subprocess.PIPE) # the pid of the scp process just started scp_proc_pid = scp_proc_handle.pid # start thread to monitor timeouts (on another thread) deploy_threading.monitor_timeout(scp_proc_pid, int(connect_timeout), remote_host, user) # execute string and block this thread until done... out, err = scp_proc_handle.communicate('') returncode = scp_proc_handle.returncode # retry if conn. was refused? if retry_on_refusal: # check if we got a connection refused. if we did, could be cuz we're # spamming the server, so sleep and then try again didwesleep = sleep_on_conn_refused(out, err, retry_on_refusal, remote_host) # we slept, so call function again and try to execute if didwesleep: # run again, but this time decrement retry counter out, err, returncode = remote_download_dir(remote_source_dir, local_dest_dir, user, remote_host, retry_on_refusal - 1, connect_timeout = default_connection_timeout) # format the string out, err = deploy_logging.format_stdout_and_err(out, err) return out, err, returncode
def remote_get_log(user, remote_host): """ <Purpose> Gets the remote logs (all tarred up) from remote_host and copies it to a local directory via scp then untars it into deploy.logs/[remote_host]/. <Arguments> user: the user to log in as remote_host: the IP of the host to get the logs from <Exceptions> scp fails/times out. <Side Effects> None. <Returns> No returns. """ try: # set up dir that we'll move the remote .tar into if not os.path.isdir('./deploy.logs/'+remote_host): os.mkdir('./deploy.logs/'+remote_host) # download the tar file from remote host out, err, returncode = remote_download_file(remote_host+'.tgz', './deploy.logs/'+remote_host+'/'+remote_host+'.tgz', user, remote_host) deploy_logging.log('Downloading logs', 'Logs downloaded from '+remote_host) # now try to untar the files # build up a command list to execute command_list = [] # tar is picky about where it'll unzip to (CWD), so we'll just Cd there command_list.append('cd ./deploy.logs/'+remote_host+'/') # now untar. if deploy_main.verbosity >=1 then we'll be verbose if deploy_main.verbosity >=1: command_list.append('tar -xvvf '+remote_host+'.tgz') else: command_list.append('tar -xf '+remote_host+'.tgz') # not make command string by joining the list elements with '; ' command_string = '; '.join(command_list) # execute string out, err, retvalue = deploy_helper.shellexec2(command_string) deploy_logging.log('Downloading logs', 'Logs from '+remote_host+' are ready') # we no longer need the tar file, just hogging up space os.remove('./deploy.logs/'+remote_host+'/'+remote_host+'.tgz') except Exception, e: if deploy_main.verbosity == 2: # Only log if we error and need to narrow this down. otherwise, # it gets really spammy. deploy_logging.logerror(remote_host+": Some kind of err in remote_get_log. ("+\ remote_host+") , error:"+str(e)+")")
ValueError: occurs when the host to be removed is not in the array <Side Effects> None. <Returns> None. """ try: thread_communications['hosts_left'].remove((user, remote_host)) except ValueError, e: # host is already removed, keep going pass except Exception, e: print e deploy_logging.logerror("Error in remove_host_from_hosts_left: " + str(e)) else: # no error, decrease the running thread count threading_lock_and_sub() def pid_timeout(): """ <Purpose> This function is intented to be called once and supposed to run on a separate thread. Until the 'kill' flag is set, it will spin and see which pid's need to be killed. All process IDs are set via the set_pid_timeout method. <Arguments>
def pid_timeout(): """ <Purpose> This function is intented to be called once and supposed to run on a separate thread. Until the 'kill' flag is set, it will spin and see which pid's need to be killed. All process IDs are set via the set_pid_timeout method. <Arguments> None. <Exceptions> OSError: the process no longer exists, ignore ValueError: when removing host from running hosts this means that the host has already been terminated. Any other exception is unexpected <Side Effects> None. <Returns> None. """ # keeps spinning and sleeping, checking which PIDs need to be killed thread_communications['running_process_ids'] = [] # while the kill flag is false. Kill flag is modified right before # exit while not thread_communications['kill_flag']: # sleep and wakeup every couple seconds. time.sleep(5) # this list will keep track of the pids that we've killed killed_pids = [] # check the running_process_ids and see if any of them have expired for each_process in thread_communications['running_process_ids']: # each process is a tuple that consists of (pid, expiretime, hostname, username) process_to_kill = each_process[0] expire_time = each_process[1] remote_host = each_process[2] user = each_process[3] # if the current time is past the set expire time then we need to try and kill it if expire_time <= time.time(): # try to kill process try: # check if process is still running if os.path.exists('/proc/' + str(process_to_kill)): os.kill(process_to_kill, 9) killed_pids.append(each_process) # sleep a second, and then check that the process was killed. if # not, try a 2nd and third time time.sleep(1) if os.path.exists('/proc/' + str(process_to_kill)): # try os.kill again, and if that doesn't work, use shellexec method os.kill(process_to_kill, 9) time.sleep(1) if os.path.exists('/proc/' + str(process_to_kill)): deploy_helper.shellexec2('kill -9 ' + str(process_to_kill)) time.sleep(1) if remote_host: deploy_logging.logerror("Forced kill of PID "+str(process_to_kill)+" due to timeout! The host"+\ " on this thread is "+remote_host) else: deploy_logging.logerror("Forced kill of PID " + str(process_to_kill) + " due to timeout!") # subtract from out running thread count and remove host subtract_host_left([(user, remote_host)]) else: # the process is dead, just remove host from hosts_left just in case, and # remove from running pids as well, but dont sub the # of threads killed_pids.append(each_process) subtract_host_left([(user, remote_host)], False) except OSError, ose: # this means no pid found and process has most likely # already terminated deploy_logging.logerror("Process" + str(process_to_kill) + "(" + remote_host + ") is already done.") subtract_host_left([(user, remote_host)], False) pass except Exception, e: deploy_logging.logerror("Unexpected error in pid_timeout thread "+\ "while killing a child process: "+str(e))
ValueError: occurs when the host to be removed is not in the array <Side Effects> None. <Returns> None. """ try: thread_communications['hosts_left'].remove((user, remote_host)) except ValueError, e: # host is already removed, keep going pass except Exception, e: print e deploy_logging.logerror("Error in remove_host_from_hosts_left: "+str(e)) else: # no error, decrease the running thread count threading_lock_and_sub() def pid_timeout(): """ <Purpose> This function is intented to be called once and supposed to run on a separate thread. Until the 'kill' flag is set, it will spin and see which pid's need to be killed. All process IDs are set via the set_pid_timeout method. <Arguments>
def pid_timeout(): """ <Purpose> This function is intented to be called once and supposed to run on a separate thread. Until the 'kill' flag is set, it will spin and see which pid's need to be killed. All process IDs are set via the set_pid_timeout method. <Arguments> None. <Exceptions> OSError: the process no longer exists, ignore ValueError: when removing host from running hosts this means that the host has already been terminated. Any other exception is unexpected <Side Effects> None. <Returns> None. """ # keeps spinning and sleeping, checking which PIDs need to be killed thread_communications['running_process_ids'] = [] # while the kill flag is false. Kill flag is modified right before # exit while not thread_communications['kill_flag']: # sleep and wakeup every couple seconds. time.sleep(5) # this list will keep track of the pids that we've killed killed_pids = [] # check the running_process_ids and see if any of them have expired for each_process in thread_communications['running_process_ids']: # each process is a tuple that consists of (pid, expiretime, hostname, username) process_to_kill = each_process[0] expire_time = each_process[1] remote_host = each_process[2] user = each_process[3] # if the current time is past the set expire time then we need to try and kill it if expire_time <= time.time(): # try to kill process try: # check if process is still running if os.path.exists('/proc/'+str(process_to_kill)): os.kill(process_to_kill, 9) killed_pids.append(each_process) # sleep a second, and then check that the process was killed. if # not, try a 2nd and third time time.sleep(1) if os.path.exists('/proc/'+str(process_to_kill)): # try os.kill again, and if that doesn't work, use shellexec method os.kill(process_to_kill, 9) time.sleep(1) if os.path.exists('/proc/'+str(process_to_kill)): deploy_helper.shellexec2('kill -9 '+str(process_to_kill)) time.sleep(1) if remote_host: deploy_logging.logerror("Forced kill of PID "+str(process_to_kill)+" due to timeout! The host"+\ " on this thread is "+remote_host) else: deploy_logging.logerror("Forced kill of PID "+str(process_to_kill)+" due to timeout!") # subtract from out running thread count and remove host subtract_host_left([(user, remote_host)]) else: # the process is dead, just remove host from hosts_left just in case, and # remove from running pids as well, but dont sub the # of threads killed_pids.append(each_process) subtract_host_left([(user, remote_host)], False) except OSError, ose: # this means no pid found and process has most likely # already terminated deploy_logging.logerror("Process"+str(process_to_kill)+"("+remote_host+") is already done.") subtract_host_left([(user, remote_host)], False) pass except Exception, e: deploy_logging.logerror("Unexpected error in pid_timeout thread "+\ "while killing a child process: "+str(e))
# if first chars match what we want ('!user:'******'!user:'******'!user:'******'\n '): # and ignore comments (lines starting with #) if line.strip('\n ')[0] != '#': # if we get here, then we have an IP so we need to check that # user is not empty.. log err if it is and complain. if not current_username: deploy_logging.logerror( 'Critical Error: No username specified for remote host group!' ) file_of_ips.close() return False # add (username, remote_host) pair while casting remote_host to lowercase in case # it's a hostname for easy comparison if needed everywhere users_ip_tuple_list.append( (current_username, line.rstrip('\n ').lower())) # set flag that we have at least one ip have_one_ip = True # return true only if we have at least ONE ip that we added to the list # and not just a bunch of users if have_one_ip: # lets make the list a set, which is a cheap way of getting rid of