def run_scancel(job_id: int, node: Node): """Cancels the job by running `scancel`. :param job_id: Job id to cancel :param node: Node to run scancel on. """ node.run("scancel {}".format(int(job_id)))
def check_address_reachable_from_node(node: Node, ip_address: str, port: int): """Attempts to connect to this address from node using TCP. :param node: Node to test the connection from. :param ip_address: Ip address to connect to. :param port: Port to connect to. """ node.run("python -c 'import socket; s=socket.socket(" "socket.AF_INET, socket.SOCK_STREAM);" " s.connect((\"{ip_address}\", {port})); " " s.close()'".format(ip_address=ip_address, port=port))
def get_remote_environment_path(node: Node, path: Optional[str]) -> str: """Returns the remote path to config file, or Raises a RuntimeError. :param node: Remote node. :param path: Optional remote path. """ if path is None: path = node.run("printenv IDACT_CONFIG_PATH || echo {}".format( DEFAULT_REMOTE_ENVIRONMENT_PATH)) path = node.run("readlink -vf {}".format(path)) if not path: raise RuntimeError("Unable to determine remote config path.") return path
def remove_runtime_dir(node: Node, runtime_dir: str): """Removes a runtime dir for deployment. Removes all files in it that do not start with a dot. Does not remove nested directories. On failure, produces a warning. :param node: Node to run commands on. :param runtime_dir: Path to the deployment dir. """ try: node.run("rm -f {runtime_dir}/*" " && rmdir {runtime_dir}".format(runtime_dir=runtime_dir)) except RuntimeError: log = get_logger(__name__) log.warning("Failed to remove runtime dir: '%s'.", runtime_dir) log.debug("Failed to remove runtime dir due to exception.", exc_info=1)
def get_free_remote_port(node: Node) -> int: """Returns a free remote port. Uses a Python snippet to determine a free port by binding a socket to port 0 and immediately releasing it. :param node: Node to find a port on. """ output = node.run("python -c 'import socket; s=socket.socket();" " s.bind((str(), 0)); print(s.getsockname()[1]);" " s.close()'") return int(output)
def extract_squeue_format_R(value: str, node: Node) -> Optional[List[str]]: """Extracts the job node list `%R` from `squeue` output, and calls `scontrol` to extract each hostname. :param value: Job node list in a compact format, e.g. `node[1-7]`. :param node: Node to call scontrol on. """ if value.startswith('('): return None output = node.run("scontrol show hostname {}".format(shlex.quote(value))) hosts = [validate_hostname(i) for i in output.splitlines()] return hosts if hosts else None
def get_node_memory_usage(node: Node) -> bitmath.KiB: """Returns the sum of RES (Resident Memory Size) of all user processes on the node as reported by top.""" # The commands below: # 1. Run top for one iteration. # 2. Skip the header and column names. # 3. Print only the column containing kibibytes used per process. # Values are not added up there, because awk does not support big integers. command = "top -b -n 1 -u $USER | awk 'NR>7 { print $6; }'" result = node.run(command) lines = result.splitlines() usage_by_process = map(parse_top_res_format_to_kib, lines) usage_total_int = sum(usage_by_process) usage_total_kibibytes = bitmath.KiB(usage_total_int) return usage_total_kibibytes
def ptree(pid: int, node: Node) -> List[int]: """Returns a list containing this PID and all its descendants. :param pid: Parent process pid. :param node: Node to run pgrep on. """ result = node.run("{list_descendants};" " echo $(list_descendants {pid})".format( list_descendants=LIST_DESCENDANTS, pid=pid)) if not result: return [pid] descendant_pids = [ int(descendant_pid) for descendant_pid in result.split(' ') ] return [pid] + descendant_pids
def run_squeue(node: Node) -> Dict[int, SqueueResult]: """Runs `squeue` and extracts job statuses as results. :param node: Node to run `squeue` on. """ now = utc_now() output = node.run("squeue" " --user $USER" " --format '%A|%D|%L|%r|%R|%T'") lines = output.splitlines()[1:] # Ignore header. results = {squeue_result.job_id: squeue_result for squeue_result in [extract_squeue_line(now=now, line=line, node=node) for line in lines] if squeue_result is not None} return results