def create_remote_setups(self): """Create the necessary build directories on all slash2 objects.""" for sl2_obj in self.all_objects(): try: ssh = SSH(self.user, sl2_obj["host"], "") log.debug("Creating build directories on {0}@{1}".format(sl2_obj["name"], sl2_obj["host"])) for d in self.build_dirs.values(): ssh.make_dirs(d) ssh.run('sudo chmod -R 777 "{0}"'.format(d), quiet=True) ssh.close() except SSHException: log.error("Unable to connect to {0} to create build directories!".format(sl2_obj["host"]))
def create_remote_setups(self): """Create the necessary build directories on all slash2 objects.""" for sl2_obj in self.all_objects(): try: ssh = SSH(self.user, sl2_obj["host"], '') log.debug("Creating build directories on {0}@{1}".format( sl2_obj["name"], sl2_obj["host"])) for d in self.build_dirs.values(): ssh.make_dirs(d) ssh.run("sudo chmod -R 777 \"{0}\"".format(d), quiet=True) ssh.close() except SSHException: log.error( "Unable to connect to {0} to create build directories!". format(sl2_obj["host"]))
def kill_mnt(tsuite): """Kill ION daemons. Args: tsuite: runtime tsuite.""" for client in tsuite.sl2objects["client"]: ssh = SSH(tsuite.user, client["host"]) if not ssh.run("sudo umount {0}".format(tsuite.build_dirs["mp"]))["err"] == []: log.critical("Cannot unmount client mountpoint at {0} @ {1}.".format(tsuite.build_dirs["mp"], client["host"])) sl2gen.stop_slash2_socks(tsuite, "client", tsuite.sl2objects["client"], "msctl", "mount_slash")
def check_status(self): """Generate general status report for all sl2 objects. Returns: { "type":[ {"host": ..., "reports": ... } ], ... }""" report = {} #Operations based on type ops = { "all": { "load": "cat /proc/loadavg | cut -d' ' -f1,2,3", "mem_total": "cat /proc/meminfo | head -n1", "mem_free": "sed -n 2,2p /proc/meminfo", "uptime": "cat /proc/uptime | head -d' ' -f1", "disk_stats": "df -hl" }, "mds": { "connections": "{slmctl} -sconnections", "iostats": "{slmctl} -siostats" }, "ion": { "connections": "{slictl} -sconnections", "iostats": "{slictl} -siostats" } } for sl2_restype in self.sl2objects.keys(): report[sl2_restype] = [] obj_ops = ops["all"] if sl2_obj[sl2_restype] in ops: obj_ops = dict(ops["all"].items() + ops[sl2_restype].items()) for sl2_obj in self.sl2objects[sl2_restype]: obj_report = { "host": sl2_obj["host"], "id": sl2_obj["id"], "reports": {} } user, host = self.user, sl2_obj["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = SSH(user, host, '') for op, cmd in obj_ops.items(): obj_report["reports"][op] = ssh.run(cmd, timeout=2) report[sl2_restype].append(obj_report) log.debug("Status check completed for {0} [{1}]".format( host, sl2_restype)) ssh.close() return report
def stop_slash2_socks(tsuite, sock_name, sl2objects, ctl_type, daemon_type): """ Terminates all slash2 socks and screen socks on a generic host. Args: tsuite: tsuite runtime. sock_name: name of sl2 sock. sl2objects: list of objects to be launched. ctl_type: key to ctl path in src_dirs daemon_type: key to daemon path in src_dirs""" assert (ctl_type in tsuite.src_dirs) assert (daemon_type in tsuite.src_dirs) for sl2object in sl2objects: log.info("Killing {0} @ {1}".format(sl2object["name"], sl2object["host"])) #Remote connection user, host = tsuite.user, sl2object["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = None try: ssh = SSH(user, host, '') except Exception: log.error("Unable to connect to {0}@{1}".format(user, host)) return cmd = "{0} -S {1}/{2}.{3}.sock stop".format(tsuite.src_dirs[ctl_type], tsuite.build_dirs["ctl"], daemon_type, host) ssh.run(cmd) if "id" not in sl2object.keys(): sl2object["id"] = 0 screen_sock_name = "sl.{0}.{1}".format(sock_name, sl2object["id"]) ssh.kill_screens(screen_sock_name, exact_sock=True) ssh.close()
def stop_slash2_socks(tsuite, sock_name, sl2objects, ctl_type, daemon_type): """ Terminates all slash2 socks and screen socks on a generic host. Args: tsuite: tsuite runtime. sock_name: name of sl2 sock. sl2objects: list of objects to be launched. ctl_type: key to ctl path in src_dirs daemon_type: key to daemon path in src_dirs""" assert ctl_type in tsuite.src_dirs assert daemon_type in tsuite.src_dirs for sl2object in sl2objects: log.info("Killing {0} @ {1}".format(sl2object["name"], sl2object["host"])) # Remote connection user, host = tsuite.user, sl2object["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = None try: ssh = SSH(user, host, "") except Exception: log.error("Unable to connect to {0}@{1}".format(user, host)) return cmd = "{0} -S {1}/{2}.{3}.sock stop".format( tsuite.src_dirs[ctl_type], tsuite.build_dirs["ctl"], daemon_type, host ) ssh.run(cmd) if "id" not in sl2object.keys(): sl2object["id"] = 0 screen_sock_name = "sl.{0}.{1}".format(sock_name, sl2object["id"]) ssh.kill_screens(screen_sock_name, exact_sock=True) ssh.close()
def check_status(self): """Generate general status report for all sl2 objects. Returns: { "type":[ {"host": ..., "reports": ... } ], ... }""" report = {} # Operations based on type ops = { "all": { "load": "cat /proc/loadavg | cut -d' ' -f1,2,3", "mem_total": "cat /proc/meminfo | head -n1", "mem_free": "sed -n 2,2p /proc/meminfo", "uptime": "cat /proc/uptime | head -d' ' -f1", "disk_stats": "df -hl", }, "mds": {"connections": "{slmctl} -sconnections", "iostats": "{slmctl} -siostats"}, "ion": {"connections": "{slictl} -sconnections", "iostats": "{slictl} -siostats"}, } for sl2_restype in self.sl2objects.keys(): report[sl2_restype] = [] obj_ops = ops["all"] if sl2_obj[sl2_restype] in ops: obj_ops = dict(ops["all"].items() + ops[sl2_restype].items()) for sl2_obj in self.sl2objects[sl2_restype]: obj_report = {"host": sl2_obj["host"], "id": sl2_obj["id"], "reports": {}} user, host = self.user, sl2_obj["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = SSH(user, host, "") for op, cmd in obj_ops.items(): obj_report["reports"][op] = ssh.run(cmd, timeout=2) report[sl2_restype].append(obj_report) log.debug("Status check completed for {0} [{1}]".format(host, sl2_restype)) ssh.close() return report
def launch_gdb_sl(tsuite, sock_name, sl2objects, res_bin_type, gdbcmd_path): """Generic slash2 launch service in screen+gdb. Will also copy over authbuf keys. Args: tsuite: tsuite runtime. sock_name: name of sl2 sock. sl2objects: list of objects to be launched. res_bin_type: key to bin path in src_dirs. gdbcmd_path: path to gdbcmd file.""" # res_bin_type NEEDS to be a path in src_dirs assert res_bin_type in tsuite.src_dirs for sl2object in sl2objects: log.debug("Initializing environment > {0} @ {1}".format(sl2object["name"], sl2object["host"])) # Remote connection user, host = tsuite.user, sl2object["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = SSH(user, host, "") # Acquire and deploy authbuf key need_authbuf = handle_authbuf(tsuite, ssh, sl2object["type"]) ls_cmd = "ls {0}/".format(tsuite.build_dirs["ctl"]) result = ssh.run(ls_cmd) present_socks = [res_bin_type in sock for sock in result["out"]].count(True) # Create monolithic reference/replace dict repl_dict = dict(tsuite.src_dirs, **tsuite.build_dirs) repl_dict = dict(repl_dict, **sl2object) if "id" not in sl2object.keys(): sl2object["id"] = 0 # Create gdbcmd from template gdbcmd_build_path = path.join( tsuite.build_dirs["base"], "{0}_{1}".format(sl2object["id"], path.basename(gdbcmd_path)) ) new_gdbcmd = repl_file(repl_dict, gdbcmd_path) if new_gdbcmd: with open(gdbcmd_build_path, "w") as f: f.write(new_gdbcmd) f.close() log.debug("Wrote gdb cmd to {0}".format(gdbcmd_build_path)) log.debug("Remote copying gdbcmd.") ssh.copy_file(gdbcmd_build_path, gdbcmd_build_path) else: log.fatal("Unable to parse gdb cmd at {0}!".format(gdbcmd_path)) tsuite.shutdown() cmd = "sudo gdb -f -x {0} {1}".format(gdbcmd_build_path, tsuite.src_dirs[res_bin_type]) screen_sock_name = "sl.{0}.{1}".format(sock_name, sl2object["id"]) # Launch slash2 in gdb within a screen session ssh.run_screen(cmd, screen_sock_name) # Wait two seconds to make sure slash2 launched without errors time.sleep(2) screen_socks = ssh.list_screen_socks() if screen_sock_name + "-error" in screen_socks or screen_sock_name not in screen_socks: log.fatal( "sl2object {0}:{1} launched with an error. Resume to {2} and resolve it.".format( sl2object["name"], sl2object["id"], screen_sock_name + "-error" ) ) tsuite.shutdown(ignore=sock_name) log.debug("Waiting for {0} sock on {1} to appear.".format(sock_name, host)) count = 0 while True: result = ssh.run(ls_cmd, quiet=True) if not all(res_bin_type not in sock for sock in result["out"]): break time.sleep(1) count += 1 if count == int(tsuite.conf["slash2"]["timeout"]): log.fatal( "Cannot find {0} sock on {1}. Resume to {2} and resolve it. ".format( res_bin_type, sl2object["id"], screen_sock_name ) ) tsuite.shutdown(ignore=sock_name) # grab pid for resouce querying later # TODO: do not grab other running instances sl2object["pid"] = ssh.run("pgrep {0}".format(res_bin_type))["out"][0].strip() log.debug("Found {0} pid on {1} : {2}".format(res_bin_type, host, sl2object["pid"])) if need_authbuf: pull_authbuf(tsuite, ssh) ssh.close()
def launch_gdb_sl(tsuite, sock_name, sl2objects, res_bin_type, gdbcmd_path): """Generic slash2 launch service in screen+gdb. Will also copy over authbuf keys. Args: tsuite: tsuite runtime. sock_name: name of sl2 sock. sl2objects: list of objects to be launched. res_bin_type: key to bin path in src_dirs. gdbcmd_path: path to gdbcmd file.""" #res_bin_type NEEDS to be a path in src_dirs assert (res_bin_type in tsuite.src_dirs) for sl2object in sl2objects: log.debug("Initializing environment > {0} @ {1}".format( sl2object["name"], sl2object["host"])) #Remote connection user, host = tsuite.user, sl2object["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = SSH(user, host, '') #Acquire and deploy authbuf key need_authbuf = handle_authbuf(tsuite, ssh, sl2object["type"]) ls_cmd = "ls {0}/".format(tsuite.build_dirs["ctl"]) result = ssh.run(ls_cmd) present_socks = [res_bin_type in sock for sock in result["out"]].count(True) #Create monolithic reference/replace dict repl_dict = dict(tsuite.src_dirs, **tsuite.build_dirs) repl_dict = dict(repl_dict, **sl2object) if "id" not in sl2object.keys(): sl2object["id"] = 0 #Create gdbcmd from template gdbcmd_build_path = path.join( tsuite.build_dirs["base"], "{0}_{1}".format(sl2object["id"], path.basename(gdbcmd_path))) new_gdbcmd = repl_file(repl_dict, gdbcmd_path) if new_gdbcmd: with open(gdbcmd_build_path, "w") as f: f.write(new_gdbcmd) f.close() log.debug("Wrote gdb cmd to {0}".format(gdbcmd_build_path)) log.debug("Remote copying gdbcmd.") ssh.copy_file(gdbcmd_build_path, gdbcmd_build_path) else: log.fatal("Unable to parse gdb cmd at {0}!".format(gdbcmd_path)) tsuite.shutdown() cmd = "sudo gdb -f -x {0} {1}".format(gdbcmd_build_path, tsuite.src_dirs[res_bin_type]) screen_sock_name = "sl.{0}.{1}".format(sock_name, sl2object["id"]) #Launch slash2 in gdb within a screen session ssh.run_screen(cmd, screen_sock_name) #Wait two seconds to make sure slash2 launched without errors time.sleep(2) screen_socks = ssh.list_screen_socks() if screen_sock_name + "-error" in screen_socks or screen_sock_name not in screen_socks: log.fatal("sl2object {0}:{1} launched with an error. Resume to {2} and resolve it."\ .format(sl2object["name"], sl2object["id"], screen_sock_name+"-error")) tsuite.shutdown(ignore=sock_name) log.debug("Waiting for {0} sock on {1} to appear.".format( sock_name, host)) count = 0 while True: result = ssh.run(ls_cmd, quiet=True) if not all(res_bin_type not in sock for sock in result["out"]): break time.sleep(1) count += 1 if count == int(tsuite.conf["slash2"]["timeout"]): log.fatal("Cannot find {0} sock on {1}. Resume to {2} and resolve it. "\ .format(res_bin_type, sl2object["id"], screen_sock_name)) tsuite.shutdown(ignore=sock_name) #grab pid for resouce querying later #TODO: do not grab other running instances sl2object["pid"] = ssh.run( "pgrep {0}".format(res_bin_type))['out'][0].strip() log.debug("Found {0} pid on {1} : {2}".format(res_bin_type, host, sl2object["pid"])) if need_authbuf: pull_authbuf(tsuite, ssh) ssh.close()