def checkpoint(): """Perform a blocking checkpoint request and rename the checkpoint files.""" global gn_index_suffix remove_stale_ptrace_files() s_checkpoint_re = "ckpt_.+\.dmtcp\..*" l_ckpts_before = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \ for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \ if re.search(s_checkpoint_re, x) != None] #fredutil.fred_debug("List ckpts before: %s" % str(l_ckpts_before)) # Request the checkpoint. n_peers = get_num_peers() cmdstr = ["dmtcp_command", "--quiet", "bc"] fredutil.execute_shell_command_and_wait(cmdstr) fredutil.fred_debug("After blocking checkpoint command.") l_new_ckpts = [] # There is what seems to be a DMTCP bug: the blocking checkpoint # can actually return before the checkpoints are written. It is # rare. while len(l_new_ckpts) < n_peers: l_ckpts_after = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \ for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \ if x.startswith("ckpt_") and x.endswith("dmtcp")] #fredutil.fred_debug("List ckpts after: %s" % str(l_ckpts_after)) l_new_ckpts = [x for x in l_ckpts_after if x not in l_ckpts_before] time.sleep(0.001) for f in l_new_ckpts: fredutil.fred_debug("Renaming ckpt file from '%s' to '%s.%d'" % (f, f, gn_index_suffix)) os.rename(f, "%s.%d" % (f, gn_index_suffix)) gn_index_suffix += 1
def kill_coordinator(n_port): """Kills the coordinator on given port.""" try: fredutil.execute_shell_command_and_wait(["dmtcp_command", "--quiet", "-p", str(n_port), "q"]) except subprocess.CalledProcessError: pass except: raise
def kill_coordinator(n_port): """Kills the coordinator on given port.""" try: fredutil.execute_shell_command_and_wait( ["dmtcp_command", "--quiet", "-p", str(n_port), "q"]) except subprocess.CalledProcessError: pass except: raise
def start_coordinator(n_port): """Start a coordinator on given port. Return False on error.""" status = fredutil.execute_shell_command_and_wait(["dmtcp_coordinator", "--daemon", "-p", str(n_port)]) return status == 0
def start_coordinator(n_port): """Start a coordinator on given port. Return False on error.""" status = fredutil.execute_shell_command_and_wait( ["dmtcp_coordinator", "--daemon", "-p", str(n_port)]) return status == 0