Пример #1
0
    def create_remote_setups(self):
        """Create the necessary build directories on all slash2 objects."""

        for sl2_obj in self.all_objects():
            try:
                ssh = SSH(self.user, sl2_obj["host"], "")
                log.debug("Creating build directories on {0}@{1}".format(sl2_obj["name"], sl2_obj["host"]))
                for d in self.build_dirs.values():
                    ssh.make_dirs(d)
                    ssh.run('sudo chmod -R 777 "{0}"'.format(d), quiet=True)
                ssh.close()
            except SSHException:
                log.error("Unable to connect to {0} to create build directories!".format(sl2_obj["host"]))
Пример #2
0
    def create_remote_setups(self):
        """Create the necessary build directories on all slash2 objects."""

        for sl2_obj in self.all_objects():
            try:
                ssh = SSH(self.user, sl2_obj["host"], '')
                log.debug("Creating build directories on {0}@{1}".format(
                    sl2_obj["name"], sl2_obj["host"]))
                for d in self.build_dirs.values():
                    ssh.make_dirs(d)
                    ssh.run("sudo chmod -R 777 \"{0}\"".format(d), quiet=True)
                ssh.close()
            except SSHException:
                log.error(
                    "Unable to connect to {0} to create build directories!".
                    format(sl2_obj["host"]))
Пример #3
0
def kill_mnt(tsuite):
  """Kill ION daemons.

  Args:
    tsuite: runtime tsuite."""
  
  for client in tsuite.sl2objects["client"]:
    ssh = SSH(tsuite.user, client["host"])
    if not ssh.run("sudo umount {0}".format(tsuite.build_dirs["mp"]))["err"] == []:
      log.critical("Cannot unmount client mountpoint at {0} @ {1}.".format(tsuite.build_dirs["mp"], client["host"]))

  sl2gen.stop_slash2_socks(tsuite, "client", tsuite.sl2objects["client"], "msctl", "mount_slash")
Пример #4
0
    def check_status(self):
        """Generate general status report for all sl2 objects.

    Returns: {
      "type":[ {"host": ..., "reports": ... } ],
      ...
    }"""
        report = {}

        #Operations based on type
        ops = {
            "all": {
                "load": "cat /proc/loadavg | cut -d' ' -f1,2,3",
                "mem_total": "cat /proc/meminfo | head -n1",
                "mem_free": "sed -n 2,2p /proc/meminfo",
                "uptime": "cat /proc/uptime | head -d' ' -f1",
                "disk_stats": "df -hl"
            },
            "mds": {
                "connections": "{slmctl} -sconnections",
                "iostats": "{slmctl} -siostats"
            },
            "ion": {
                "connections": "{slictl} -sconnections",
                "iostats": "{slictl} -siostats"
            }
        }

        for sl2_restype in self.sl2objects.keys():

            report[sl2_restype] = []

            obj_ops = ops["all"]
            if sl2_obj[sl2_restype] in ops:
                obj_ops = dict(ops["all"].items() + ops[sl2_restype].items())

            for sl2_obj in self.sl2objects[sl2_restype]:
                obj_report = {
                    "host": sl2_obj["host"],
                    "id": sl2_obj["id"],
                    "reports": {}
                }
                user, host = self.user, sl2_obj["host"]
                log.debug("Connecting to {0}@{1}".format(user, host))
                ssh = SSH(user, host, '')
                for op, cmd in obj_ops.items():
                    obj_report["reports"][op] = ssh.run(cmd, timeout=2)

                report[sl2_restype].append(obj_report)
                log.debug("Status check completed for {0} [{1}]".format(
                    host, sl2_restype))
                ssh.close()
        return report
Пример #5
0
def stop_slash2_socks(tsuite, sock_name, sl2objects, ctl_type, daemon_type):
    """ Terminates all slash2 socks and screen socks on a generic host.
  Args:
    tsuite: tsuite runtime.
    sock_name: name of sl2 sock.
    sl2objects: list of objects to be launched.
    ctl_type: key to ctl path in src_dirs
    daemon_type: key to daemon path in src_dirs"""

    assert (ctl_type in tsuite.src_dirs)
    assert (daemon_type in tsuite.src_dirs)

    for sl2object in sl2objects:
        log.info("Killing {0} @ {1}".format(sl2object["name"],
                                            sl2object["host"]))

        #Remote connection
        user, host = tsuite.user, sl2object["host"]
        log.debug("Connecting to {0}@{1}".format(user, host))

        ssh = None
        try:
            ssh = SSH(user, host, '')
        except Exception:
            log.error("Unable to connect to {0}@{1}".format(user, host))
            return

        cmd = "{0} -S {1}/{2}.{3}.sock stop".format(tsuite.src_dirs[ctl_type],
                                                    tsuite.build_dirs["ctl"],
                                                    daemon_type, host)
        ssh.run(cmd)

        if "id" not in sl2object.keys():
            sl2object["id"] = 0

        screen_sock_name = "sl.{0}.{1}".format(sock_name, sl2object["id"])
        ssh.kill_screens(screen_sock_name, exact_sock=True)

        ssh.close()
Пример #6
0
def stop_slash2_socks(tsuite, sock_name, sl2objects, ctl_type, daemon_type):
    """ Terminates all slash2 socks and screen socks on a generic host.
  Args:
    tsuite: tsuite runtime.
    sock_name: name of sl2 sock.
    sl2objects: list of objects to be launched.
    ctl_type: key to ctl path in src_dirs
    daemon_type: key to daemon path in src_dirs"""

    assert ctl_type in tsuite.src_dirs
    assert daemon_type in tsuite.src_dirs

    for sl2object in sl2objects:
        log.info("Killing {0} @ {1}".format(sl2object["name"], sl2object["host"]))

        # Remote connection
        user, host = tsuite.user, sl2object["host"]
        log.debug("Connecting to {0}@{1}".format(user, host))

        ssh = None
        try:
            ssh = SSH(user, host, "")
        except Exception:
            log.error("Unable to connect to {0}@{1}".format(user, host))
            return

        cmd = "{0} -S {1}/{2}.{3}.sock stop".format(
            tsuite.src_dirs[ctl_type], tsuite.build_dirs["ctl"], daemon_type, host
        )
        ssh.run(cmd)

        if "id" not in sl2object.keys():
            sl2object["id"] = 0

        screen_sock_name = "sl.{0}.{1}".format(sock_name, sl2object["id"])
        ssh.kill_screens(screen_sock_name, exact_sock=True)

        ssh.close()
Пример #7
0
    def check_status(self):
        """Generate general status report for all sl2 objects.

    Returns: {
      "type":[ {"host": ..., "reports": ... } ],
      ...
    }"""
        report = {}

        # Operations based on type
        ops = {
            "all": {
                "load": "cat /proc/loadavg | cut -d' ' -f1,2,3",
                "mem_total": "cat /proc/meminfo | head -n1",
                "mem_free": "sed -n 2,2p /proc/meminfo",
                "uptime": "cat /proc/uptime | head -d' ' -f1",
                "disk_stats": "df -hl",
            },
            "mds": {"connections": "{slmctl} -sconnections", "iostats": "{slmctl} -siostats"},
            "ion": {"connections": "{slictl} -sconnections", "iostats": "{slictl} -siostats"},
        }

        for sl2_restype in self.sl2objects.keys():

            report[sl2_restype] = []

            obj_ops = ops["all"]
            if sl2_obj[sl2_restype] in ops:
                obj_ops = dict(ops["all"].items() + ops[sl2_restype].items())

            for sl2_obj in self.sl2objects[sl2_restype]:
                obj_report = {"host": sl2_obj["host"], "id": sl2_obj["id"], "reports": {}}
                user, host = self.user, sl2_obj["host"]
                log.debug("Connecting to {0}@{1}".format(user, host))
                ssh = SSH(user, host, "")
                for op, cmd in obj_ops.items():
                    obj_report["reports"][op] = ssh.run(cmd, timeout=2)

                report[sl2_restype].append(obj_report)
                log.debug("Status check completed for {0} [{1}]".format(host, sl2_restype))
                ssh.close()
        return report
Пример #8
0
def launch_gdb_sl(tsuite, sock_name, sl2objects, res_bin_type, gdbcmd_path):
    """Generic slash2 launch service in screen+gdb. Will also copy over authbuf keys.

  Args:
    tsuite: tsuite runtime.
    sock_name: name of sl2 sock.
    sl2objects: list of objects to be launched.
    res_bin_type: key to bin path in src_dirs.
    gdbcmd_path: path to gdbcmd file."""

    # res_bin_type NEEDS to be a path in src_dirs
    assert res_bin_type in tsuite.src_dirs

    for sl2object in sl2objects:
        log.debug("Initializing environment > {0} @ {1}".format(sl2object["name"], sl2object["host"]))

        # Remote connection
        user, host = tsuite.user, sl2object["host"]
        log.debug("Connecting to {0}@{1}".format(user, host))
        ssh = SSH(user, host, "")

        # Acquire and deploy authbuf key
        need_authbuf = handle_authbuf(tsuite, ssh, sl2object["type"])

        ls_cmd = "ls {0}/".format(tsuite.build_dirs["ctl"])
        result = ssh.run(ls_cmd)

        present_socks = [res_bin_type in sock for sock in result["out"]].count(True)

        # Create monolithic reference/replace dict
        repl_dict = dict(tsuite.src_dirs, **tsuite.build_dirs)
        repl_dict = dict(repl_dict, **sl2object)

        if "id" not in sl2object.keys():
            sl2object["id"] = 0

        # Create gdbcmd from template
        gdbcmd_build_path = path.join(
            tsuite.build_dirs["base"], "{0}_{1}".format(sl2object["id"], path.basename(gdbcmd_path))
        )

        new_gdbcmd = repl_file(repl_dict, gdbcmd_path)

        if new_gdbcmd:
            with open(gdbcmd_build_path, "w") as f:
                f.write(new_gdbcmd)
                f.close()
                log.debug("Wrote gdb cmd to {0}".format(gdbcmd_build_path))
                log.debug("Remote copying gdbcmd.")
                ssh.copy_file(gdbcmd_build_path, gdbcmd_build_path)
        else:
            log.fatal("Unable to parse gdb cmd at {0}!".format(gdbcmd_path))
            tsuite.shutdown()

        cmd = "sudo gdb -f -x {0} {1}".format(gdbcmd_build_path, tsuite.src_dirs[res_bin_type])
        screen_sock_name = "sl.{0}.{1}".format(sock_name, sl2object["id"])

        # Launch slash2 in gdb within a screen session
        ssh.run_screen(cmd, screen_sock_name)

        # Wait two seconds to make sure slash2 launched without errors
        time.sleep(2)

        screen_socks = ssh.list_screen_socks()
        if screen_sock_name + "-error" in screen_socks or screen_sock_name not in screen_socks:
            log.fatal(
                "sl2object {0}:{1} launched with an error. Resume to {2} and resolve it.".format(
                    sl2object["name"], sl2object["id"], screen_sock_name + "-error"
                )
            )
            tsuite.shutdown(ignore=sock_name)

        log.debug("Waiting for {0} sock on {1} to appear.".format(sock_name, host))
        count = 0
        while True:
            result = ssh.run(ls_cmd, quiet=True)
            if not all(res_bin_type not in sock for sock in result["out"]):
                break
            time.sleep(1)
            count += 1
            if count == int(tsuite.conf["slash2"]["timeout"]):
                log.fatal(
                    "Cannot find {0} sock on {1}. Resume to {2} and resolve it. ".format(
                        res_bin_type, sl2object["id"], screen_sock_name
                    )
                )
                tsuite.shutdown(ignore=sock_name)

        # grab pid for resouce querying later
        # TODO: do not grab other running instances
        sl2object["pid"] = ssh.run("pgrep {0}".format(res_bin_type))["out"][0].strip()
        log.debug("Found {0} pid on {1} : {2}".format(res_bin_type, host, sl2object["pid"]))

        if need_authbuf:
            pull_authbuf(tsuite, ssh)

        ssh.close()
Пример #9
0
def launch_gdb_sl(tsuite, sock_name, sl2objects, res_bin_type, gdbcmd_path):
    """Generic slash2 launch service in screen+gdb. Will also copy over authbuf keys.

  Args:
    tsuite: tsuite runtime.
    sock_name: name of sl2 sock.
    sl2objects: list of objects to be launched.
    res_bin_type: key to bin path in src_dirs.
    gdbcmd_path: path to gdbcmd file."""

    #res_bin_type NEEDS to be a path in src_dirs
    assert (res_bin_type in tsuite.src_dirs)

    for sl2object in sl2objects:
        log.debug("Initializing environment > {0} @ {1}".format(
            sl2object["name"], sl2object["host"]))

        #Remote connection
        user, host = tsuite.user, sl2object["host"]
        log.debug("Connecting to {0}@{1}".format(user, host))
        ssh = SSH(user, host, '')

        #Acquire and deploy authbuf key
        need_authbuf = handle_authbuf(tsuite, ssh, sl2object["type"])

        ls_cmd = "ls {0}/".format(tsuite.build_dirs["ctl"])
        result = ssh.run(ls_cmd)

        present_socks = [res_bin_type in sock
                         for sock in result["out"]].count(True)

        #Create monolithic reference/replace dict
        repl_dict = dict(tsuite.src_dirs, **tsuite.build_dirs)
        repl_dict = dict(repl_dict, **sl2object)

        if "id" not in sl2object.keys():
            sl2object["id"] = 0

        #Create gdbcmd from template
        gdbcmd_build_path = path.join(
            tsuite.build_dirs["base"],
            "{0}_{1}".format(sl2object["id"], path.basename(gdbcmd_path)))

        new_gdbcmd = repl_file(repl_dict, gdbcmd_path)

        if new_gdbcmd:
            with open(gdbcmd_build_path, "w") as f:
                f.write(new_gdbcmd)
                f.close()
                log.debug("Wrote gdb cmd to {0}".format(gdbcmd_build_path))
                log.debug("Remote copying gdbcmd.")
                ssh.copy_file(gdbcmd_build_path, gdbcmd_build_path)
        else:
            log.fatal("Unable to parse gdb cmd at {0}!".format(gdbcmd_path))
            tsuite.shutdown()

        cmd = "sudo gdb -f -x {0} {1}".format(gdbcmd_build_path,
                                              tsuite.src_dirs[res_bin_type])
        screen_sock_name = "sl.{0}.{1}".format(sock_name, sl2object["id"])

        #Launch slash2 in gdb within a screen session
        ssh.run_screen(cmd, screen_sock_name)

        #Wait two seconds to make sure slash2 launched without errors
        time.sleep(2)

        screen_socks = ssh.list_screen_socks()
        if screen_sock_name + "-error" in screen_socks or screen_sock_name not in screen_socks:
            log.fatal("sl2object {0}:{1} launched with an error. Resume to {2} and resolve it."\
                .format(sl2object["name"], sl2object["id"], screen_sock_name+"-error"))
            tsuite.shutdown(ignore=sock_name)

        log.debug("Waiting for {0} sock on {1} to appear.".format(
            sock_name, host))
        count = 0
        while True:
            result = ssh.run(ls_cmd, quiet=True)
            if not all(res_bin_type not in sock for sock in result["out"]):
                break
            time.sleep(1)
            count += 1
            if count == int(tsuite.conf["slash2"]["timeout"]):
                log.fatal("Cannot find {0} sock on {1}. Resume to {2} and resolve it. "\
                  .format(res_bin_type, sl2object["id"], screen_sock_name))
                tsuite.shutdown(ignore=sock_name)

        #grab pid for resouce querying later
        #TODO: do not grab other running instances
        sl2object["pid"] = ssh.run(
            "pgrep {0}".format(res_bin_type))['out'][0].strip()
        log.debug("Found {0} pid on {1} : {2}".format(res_bin_type, host,
                                                      sl2object["pid"]))

        if need_authbuf:
            pull_authbuf(tsuite, ssh)

        ssh.close()