示例#1
0
def kill_running_monitoring_stack_services():
    lr = LocalCmdRunner()  # pylint: disable=invalid-name
    for docker in get_monitoring_stack_services():
        LOGGER.info("Killing %s", docker['service'])
        lr.run('docker rm -f {name}-{port}'.format(name=docker['name'],
                                                   port=docker['port']),
               ignore_status=True)
示例#2
0
def start_auto_ssh(docker_name, node, local_port, remote_port, ssh_mode="-R"):
    """
    Starts a reverse port forwarding with autossh inside a docker container

    :param docker_name: prefix of the docker name (cluster.Setup.test_id() usually would be used)
    :param node: an instance of a class derived from BaseNode that has ssh_login_info
    :param local_port: the destination port on local machine
    :param remote_port: the source port on the remote
    :param ssh_mode: define tunnel mode: -R reverse mode, -L direct mode
    :return: None
    """
    # pylint: disable=protected-access

    host_name = node.ssh_login_info['hostname']
    user_name = node.ssh_login_info['user']
    key_path = node.ssh_login_info['key_file']

    local_runner = LocalCmdRunner()
    res = local_runner.run('''
           docker run -d --network=host \
           -e SSH_HOSTNAME={host_name} \
           -e SSH_HOSTUSER={user_name} \
           -e SSH_TUNNEL_HOST=127.0.0.1 \
           -e SSH_MODE={ssh_mode} \
           -e SSH_TUNNEL_LOCAL={local_port} \
           -e SSH_TUNNEL_REMOTE={remote_port} \
           -e AUTOSSH_GATETIME=0 \
           -v {key_path}:/id_rsa  \
           --restart always \
           --name {docker_name}-{host_name}-autossh jnovack/autossh
       '''.format(host_name=host_name, user_name=user_name, ssh_mode=ssh_mode, local_port=local_port, remote_port=remote_port, key_path=key_path, docker_name=docker_name))

    atexit.register(stop_auto_ssh, docker_name, node)
    LOGGER.debug('{docker_name}-{host_name}-autossh {res.stdout}'.format(docker_name=docker_name,
                                                                         host_name=host_name, res=res))
示例#3
0
def stop_rsyslog():
    global RSYSLOG_DOCKER_ID, RSYSLOG_CONF_PATH
    if RSYSLOG_DOCKER_ID:
        local_runner = LocalCmdRunner()
        local_runner.run("docker kill {id}".format(id=RSYSLOG_DOCKER_ID), ignore_status=True)

    if RSYSLOG_CONF_PATH:
        try:
            os.remove(RSYSLOG_CONF_PATH)
        except Exception:
            pass

    RSYSLOG_CONF_PATH = None
    RSYSLOG_DOCKER_ID = None
示例#4
0
def start_rsyslog(docker_name, log_dir, port="514"):
    """
    Start rsyslog in a docker, for getting logs from db-nodes

    :param docker_name: name of the docker instance
    :param log_dir: directory where to store the logs
    :param port: [Optional] the port binding for the docker run

    :return: the listening port
    """
    global RSYSLOG_DOCKER_ID, RSYSLOG_CONF_PATH  # pylint: disable=global-statement

    log_dir = os.path.abspath(log_dir)

    # cause of docker-in-docker, we need to capture the host log dir for mounting it
    # _SCT_BASE_DIR is set in hydra.sh
    base_dir = os.environ.get("_SCT_BASE_DIR", None)
    if base_dir:
        mount_log_dir = os.path.join(base_dir, os.path.basename(log_dir))
    else:
        mount_log_dir = log_dir

    conf_path = generate_conf_file()
    RSYSLOG_CONF_PATH = conf_path
    local_runner = LocalCmdRunner()
    res = local_runner.run('''
        mkdir -p {log_dir};
        docker run --rm -d \
        -v /etc/passwd:/etc/passwd:ro \
        -v /etc/group:/etc/group:ro \
        -v {mount_log_dir}:/logs \
        -v {conf_path}:/etc/rsyslog.conf \
        -p {port} \
        --name {docker_name}-rsyslogd rsyslog/syslog_appliance_alpine
    '''.format(log_dir=log_dir,
               mount_log_dir=mount_log_dir,
               conf_path=conf_path,
               port=port,
               docker_name=docker_name))

    RSYSLOG_DOCKER_ID = res.stdout.strip()
    LOGGER.info("Rsyslog started. Container id: %s", RSYSLOG_DOCKER_ID)

    atexit.register(stop_rsyslog)

    res = local_runner.run('docker port {0} 514'.format(RSYSLOG_DOCKER_ID))
    listening_port = res.stdout.strip().split(':')[1]

    return listening_port
示例#5
0
def start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir, scylla_version):  # pylint: disable=unused-argument
    graf_port = GRAFANA_DOCKER_PORT
    alert_port = ALERT_DOCKER_PORT
    prom_port = PROMETHEUS_DOCKER_PORT
    lr = LocalCmdRunner()  # pylint: disable=invalid-name
    lr.run('cd {monitoring_dockers_dir}; ./kill-all.sh -g {graf_port} -m {alert_port} -p {prom_port}'.format(**locals()),
           ignore_status=True, verbose=False)
    cmd = dedent("""cd {monitoring_dockers_dir};
            ./start-all.sh \
            -g {graf_port} -m {alert_port} -p {prom_port} \
            -s {monitoring_dockers_dir}/config/scylla_servers.yml \
            -n {monitoring_dockers_dir}/config/node_exporter_servers.yml \
            -d {monitoring_stack_data_dir} -v {scylla_version}""".format(**locals()))
    res = lr.run(cmd)
    if res.ok:
        LOGGER.info("Docker containers for monitoring stack are started")
示例#6
0
 def __init__(self,
              name,
              ssh_login_info=None,
              instance=None,
              global_ip=None,
              grafana_ip=None,
              tags=None,
              logdir=None):  # pylint: disable=too-many-arguments
     if logdir:
         self.logdir = logdir
     self._containers = {}
     self.name = name
     if ssh_login_info is None:
         self.remoter = LocalCmdRunner()
     else:
         self.remoter = RemoteCmdRunner(**ssh_login_info)
     self.ssh_login_info = ssh_login_info
     self._instance = instance
     self.external_address = global_ip
     if grafana_ip is None:
         self.grafana_address = global_ip
     else:
         self.grafana_address = grafana_ip
     self.tags = {
         **(tags or {}),
         "Name": self.name,
     }
示例#7
0
    def test_run_in_mainthread(  # pylint: disable=too-many-arguments
            self, remoter_type, host: str, stmt: str, verbose: bool, ignore_status: bool, new_session: bool, retry: int,
            timeout: Union[float, None]):
        kwargs = {
            'verbose': verbose,
            'ignore_status': ignore_status,
            'new_session': new_session,
            'retry': retry,
            'timeout': timeout}
        try:
            expected = LocalCmdRunner().run(stmt, **kwargs)
        except Exception as exc:  # pylint: disable=broad-except
            expected = exc

        if issubclass(remoter_type, (RemoteCmdRunner, RemoteLibSSH2CmdRunner)):
            remoter = remoter_type(hostname=host, user=getpass.getuser(), key_file=self.key_file)
        else:
            remoter = KubernetesCmdRunner(
                FakeKluster('http://127.0.0.1:8001'),
                pod='sct-cluster-gce-minikube-0', container="scylla", namespace="scylla")
        try:
            result = remoter.run(stmt, **kwargs)
        except Exception as exc:  # pylint: disable=broad-except
            result = exc
        remoter._reconnect()
        try:
            result2 = remoter.run(stmt, **kwargs)
        except Exception as exc:  # pylint: disable=broad-except
            result2 = exc
        remoter.stop()

        self._compare_results(expected, result, stmt=stmt, kwargs=kwargs)
        self._compare_results(expected, result2, stmt=stmt, kwargs=kwargs)
示例#8
0
    def test_create_and_run_in_same_thread(  # pylint: disable=too-many-arguments,too-many-locals
            self, host: str, stmt: str, verbose: bool, ignore_status: bool, new_session: bool,
            retry: int, timeout: Union[float, None]):
        kwargs = {
            'verbose': verbose,
            'ignore_status': ignore_status,
            'new_session': new_session,
            'retry': retry,
            'timeout': timeout}
        self.log.info(repr({stmt: stmt, **kwargs}))
        try:
            expected = LocalCmdRunner().run(stmt, **kwargs)
        except Exception as exc:  # pylint: disable=broad-except
            expected = exc

        paramiko_thread_results = []
        self._run_parallel(
            3,
            thread_body=self._create_and_run_in_same_thread,
            args=(RemoteCmdRunner, host, self.key_file, stmt, kwargs, paramiko_thread_results),
            kwargs={})

        libssh2_thread_results = []
        self._run_parallel(
            3,
            thread_body=self._create_and_run_in_same_thread,
            args=(RemoteLibSSH2CmdRunner, host, self.key_file, stmt, kwargs, libssh2_thread_results),
            kwargs={})

        for paramiko_result in paramiko_thread_results:
            self._compare_results(expected, paramiko_result, stmt=stmt, kwargs=kwargs)

        for libssh2_result in libssh2_thread_results:
            self._compare_results(expected, libssh2_result, stmt=stmt, kwargs=kwargs)
示例#9
0
    def test_create_and_run_in_separate_thread(  # pylint: disable=too-many-arguments
            self, remoter_type, host: str, stmt: str, verbose: bool, ignore_status: bool,
            new_session: bool, retry: int, timeout: Union[float, None]):
        kwargs = {
            'verbose': verbose,
            'ignore_status': ignore_status,
            'new_session': new_session,
            'retry': retry,
            'timeout': timeout}
        self.log.info(repr({stmt: stmt, **kwargs}))
        try:
            expected = LocalCmdRunner().run(stmt, **kwargs)
        except Exception as exc:  # pylint: disable=broad-except
            expected = exc

        # Paramiko fails too often when it is invoked like that, that is why it is not in the test

        if issubclass(remoter_type, (RemoteCmdRunner, RemoteLibSSH2CmdRunner)):
            remoter = remoter_type(hostname=host, user=getpass.getuser(), key_file=self.key_file)
        else:
            remoter = KubernetesCmdRunner(
                FakeKluster('http://127.0.0.1:8001'),
                pod='sct-cluster-gce-minikube-0', container="scylla", namespace="scylla")

        libssh2_thread_results = []

        self._run_parallel(
            3,
            thread_body=self._create_and_run_in_separate_thread,
            args=(remoter, stmt, kwargs, libssh2_thread_results),
            kwargs={})

        for libssh2_result in libssh2_thread_results:
            self.log.error(str(libssh2_result))
            self._compare_results(expected, libssh2_result, stmt=stmt, kwargs=kwargs)
示例#10
0
    def test_create_and_run_in_separate_thread(  # pylint: disable=too-many-arguments
            self, host: str, stmt: str, verbose: bool, ignore_status: bool,
            new_session: bool, retry: int, timeout: Union[float, None]):
        kwargs = {
            'verbose': verbose,
            'ignore_status': ignore_status,
            'new_session': new_session,
            'retry': retry,
            'timeout': timeout}
        self.log.info(repr({stmt: stmt, **kwargs}))
        try:
            expected = LocalCmdRunner().run(stmt, **kwargs)
        except Exception as exc:  # pylint: disable=broad-except
            expected = exc

        # Paramiko fails too often when it is invoked like that, that is why it is not in the test

        remoter = RemoteLibSSH2CmdRunner(hostname=host, user=getpass.getuser(), key_file=self.key_file)
        libssh2_thread_results = []

        self._run_parallel(
            3,
            thread_body=self._create_and_run_in_separate_thread,
            args=(remoter, stmt, kwargs, libssh2_thread_results),
            kwargs={})

        for libssh2_result in libssh2_thread_results:
            self.log.error(str(libssh2_result))
            self._compare_results(expected, libssh2_result, stmt=stmt, kwargs=kwargs)
    def test_load_1000_threads(self, remoter_type, stmt: str):
        kwargs = {
            'verbose': True,
            'ignore_status': False,
            'new_session': True,
            'retry': 2
        }
        self.log.info(repr({stmt: stmt, **kwargs}))
        try:
            expected = LocalCmdRunner().run(stmt, **kwargs)
        except Exception as exc:  # pylint: disable=broad-except
            expected = exc

        libssh2_thread_results = []
        self._run_parallel(1000,
                           thread_body=self._create_and_run_in_same_thread,
                           args=(remoter_type, self.key_file, stmt, kwargs,
                                 libssh2_thread_results),
                           kwargs={})

        for libssh2_result in libssh2_thread_results:
            self.log.error(str(libssh2_result))
            self._compare_results(expected,
                                  libssh2_result,
                                  stmt=stmt,
                                  kwargs=kwargs)
示例#12
0
 def get_docker_instances_by_testid(self):
     instances = list_instances_gce({"TestId": self.test_id}, running=True)
     filtered_instances = filter_gce_instances_by_type(instances)
     for instance in filtered_instances['db_nodes']:
         self.db_cluster.append(
             CollectingNode(name=instance.name,
                            ssh_login_info={
                                "hostname": instance.public_ips[0],
                                "user": '******',
                                "key_file":
                                self.params['user_credentials_path']
                            },
                            instance=instance,
                            global_ip=instance.public_ips[0]))
     self.monitor_set.append(
         CollectingNode(name=f"monitor-node-{self.test_id}-0",
                        global_ip='127.0.0.1',
                        grafana_ip=get_docker_bridge_gateway(
                            LocalCmdRunner())))
     for instance in filtered_instances['loader_nodes']:
         self.loader_set.append(
             CollectingNode(name=instance.name,
                            ssh_login_info={
                                "hostname": instance.public_ips[0],
                                "user": '******',
                                "key_file":
                                self.params['user_credentials_path']
                            },
                            instance=instance,
                            global_ip=instance.public_ips[0]))
示例#13
0
 def _get_screenshot_link(self,
                          grafana_url,
                          screenshot_path,
                          resolution="1920x1280"):
     LocalCmdRunner().run(
         "cd {0.phantomjs_dir} && bin/phantomjs {0.sct_base_path}/data_dir/make_screenshot.js \"{1}\" \"{2}\" {3}"
         .format(self, grafana_url, screenshot_path, resolution),
         ignore_status=True)
 def __init__(self):
     self.remoter = LocalCmdRunner()
     self.remoter.receive_files = types.MethodType(send_receive_files,
                                                   self.remoter)
     self.remoter.send_files = types.MethodType(send_receive_files,
                                                self.remoter)
     self.ip_address = "127.0.0.1"
     self.logdir = os.path.dirname(__file__)
示例#15
0
def is_docker_available():
    LOGGER.info("Checking that docker is available...")
    result = LocalCmdRunner().run('docker ps', ignore_status=True, verbose=False)
    if result.ok:
        LOGGER.info('Docker is available')
        return True
    else:
        LOGGER.warning('Docker is not available on your computer. Please install docker software before continue')
        return False
示例#16
0
def is_auto_ssh_running(docker_name, node):
    """
    check it auto_ssh docker running

    :param docker_name: docker name
    :type docker_name: str
    :param node: Node instance where tunnel was up
    :type node: BaseNode
    :returns: true if running, false otherwise
    :rtype: {boolean}
    """
    local_runner = LocalCmdRunner()
    host_name = node.ssh_login_info['hostname']
    container_name = f"{docker_name}-{host_name}-autossh"

    result = local_runner.run('docker ps', ignore_status=True)

    return container_name in result.stdout
示例#17
0
def verify_dockers_are_running():
    result = LocalCmdRunner().run("docker ps --format '{{.Names}}'", ignore_status=True)  # pylint: disable=invalid-name
    docker_names = result.stdout.strip().split()
    if result.ok and docker_names:
        if f"{GRAFANA_DOCKER_NAME}-{GRAFANA_DOCKER_PORT}" in docker_names \
                and f"{PROMETHEUS_DOCKER_NAME}-{PROMETHEUS_DOCKER_PORT}" in docker_names:
            LOGGER.info("Monitoring stack docker containers are running.\n%s", result.stdout)
            return True
    LOGGER.error("Monitoring stack containers are not running\nStdout:\n%s\nstderr:%s", result.stdout, result.stderr)
    return False
 def __init__(self,
              name,
              parent_cluster,
              ssh_login_info=None,
              base_logdir=None,
              node_prefix=None,
              dc_idx=0):
     super().__init__(name, parent_cluster)
     self.remoter = LocalCmdRunner()
     self.logdir = os.path.dirname(__file__)
示例#19
0
def start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir,
                  scylla_version):  # pylint: disable=unused-argument
    graf_port = GRAFANA_DOCKER_PORT
    alert_port = ALERT_DOCKER_PORT
    prom_port = PROMETHEUS_DOCKER_PORT
    lr = LocalCmdRunner()  # pylint: disable=invalid-name
    lr.run(
        'cd {monitoring_dockers_dir}; ./kill-all.sh -g {graf_port} -m {alert_port} -p {prom_port}'
        .format(**locals()),
        ignore_status=True,
        verbose=False)

    # clear scylla nodes from configuration
    servers_yaml = Path(
        monitoring_dockers_dir) / 'config' / 'scylla_servers.yml'
    servers_yaml.write_text("- targets: []")

    # clear SCT scrape configurations
    prom_tmpl_file = Path(
        monitoring_dockers_dir) / 'prometheus' / 'prometheus.yml.template'
    templ_yaml = yaml.safe_load(prom_tmpl_file.read_text())

    def remove_sct_metrics(metric):
        return '_metrics' not in metric['job_name']

    templ_yaml["scrape_configs"] = list(
        filter(remove_sct_metrics, templ_yaml["scrape_configs"]))
    prom_tmpl_file.write_text(yaml.safe_dump(templ_yaml))

    cmd = dedent("""cd {monitoring_dockers_dir};
            echo "" > UA.sh
            ./start-all.sh \
            $(grep -q -- --no-renderer ./start-all.sh && echo "--no-renderer")  \
            $(grep -q -- --no-loki ./start-all.sh && echo "--no-loki")  \
            -g {graf_port} -m {alert_port} -p {prom_port} \
            -s {monitoring_dockers_dir}/config/scylla_servers.yml \
            -d {monitoring_stack_data_dir} -v {scylla_version} \
            -b '-storage.tsdb.retention.time=100y' \
            -c 'GF_USERS_DEFAULT_THEME=dark'""".format(**locals()))
    res = lr.run(cmd)
    if res.ok:
        LOGGER.info("Docker containers for monitoring stack are started")
示例#20
0
    def install_phantom_js(self):
        """Install phantom_js to sct root dir

        If sct runs outside the docker container,
        sct will install the phantomjs to
        sct root dir
        """
        localrunner = LocalCmdRunner()
        if not self.phantomjs_installed:
            LOGGER.debug("Installing phantomjs to sct root dir")
            # pylint: disable=unused-variable
            phantomjs_tar = "{0.phantomjs_base}.tar.bz2".format(self)
            phantomjs_url = "https://bitbucket.org/ariya/phantomjs/downloads/{phantomjs_tar}".format(
                **locals())
            install_phantom_js_script = dedent("""
                curl {phantomjs_url} -o {phantomjs_tar} -L
                tar xvfj {phantomjs_tar}
            """.format(**locals()))
            localrunner.run("bash -ce '%s'" % install_phantom_js_script)
            self.phantomjs_dir = os.path.join(self.sct_base_path,
                                              self.phantomjs_base)
        else:
            LOGGER.debug("PhantomJS is already installed!")
示例#21
0
def create_monitoring_stack_dir(base_dir, archive):
    cmd = dedent("""
        cd {data_dir}
        cp {archive} ./
        tar -xvf {archive_name}
        chmod -R 777 {data_dir}
        """.format(data_dir=base_dir,
                   archive_name=os.path.basename(archive),
                   archive=archive))

    result = LocalCmdRunner().run(cmd, ignore_status=True)
    if result.exited > 0:
        LOGGER.error("Error during extracting monitoring stack")
        return False

    return get_monitoring_stack_dir(base_dir)
示例#22
0
def create_monitoring_data_dir(base_dir, archive):
    monitoring_data_base_dir = os.path.join(base_dir, 'monitoring_data_dir')
    cmd = dedent("""
        mkdir -p {data_dir}
        cd {data_dir}
        cp {archive} ./
        tar -xvf {archive_name}
        chmod -R 777 {data_dir}
        """.format(data_dir=monitoring_data_base_dir,
                   archive=archive,
                   archive_name=os.path.basename(archive)))
    result = LocalCmdRunner().run(cmd, timeout=COMMAND_TIMEOUT, ignore_status=True)
    if result.exited > 0:
        LOGGER.error("Error during extracting prometheus snapshot. Switch to next archive")
        return False
    return get_monitoring_data_dir(monitoring_data_base_dir)
示例#23
0
    def _get_shared_snapshot_link(self, grafana_url):
        result = LocalCmdRunner().run(
            "cd {0.phantomjs_dir} && bin/phantomjs {0.sct_base_path}/data_dir/share_snapshot.js \"{1}\""
            .format(self, grafana_url),
            ignore_status=True)
        # since there is only one monitoring node returning here
        output = result.stdout.strip()
        if "Error" in output:
            LOGGER.error(output)
            return ""
        else:
            matched = re.search(
                r"https://snapshot.raintank.io/dashboard/snapshot/\w+", output)
            LOGGER.info("Shared grafana snapshot: {}".format(matched.group()))

            return matched.group()
示例#24
0
 def __init__(self,
              name,
              ssh_login_info=None,
              instance=None,
              global_ip=None,
              grafana_ip=None):  # pylint: disable=too-many-arguments
     self.name = name
     if ssh_login_info is None:
         self.remoter = LocalCmdRunner()
     else:
         self.remoter = RemoteCmdRunner(**ssh_login_info)
     self.ssh_login_info = ssh_login_info
     self._instance = instance
     self.external_address = global_ip
     if grafana_ip is None:
         self.grafana_address = global_ip
     else:
         self.grafana_address = grafana_ip
示例#25
0
    def test_run_in_mainthread(  # pylint: disable=too-many-arguments
            self, host: str, stmt: str, verbose: bool, ignore_status: bool, new_session: bool, retry: int,
            timeout: Union[float, None]):
        kwargs = {
            'verbose': verbose,
            'ignore_status': ignore_status,
            'new_session': new_session,
            'retry': retry,
            'timeout': timeout}
        try:
            expected = LocalCmdRunner().run(stmt, **kwargs)
        except Exception as exc:  # pylint: disable=broad-except
            expected = exc

        remoter = RemoteCmdRunner(
            hostname=host, user=getpass.getuser(), key_file=self.key_file)
        try:
            paramiko_result = remoter.run(stmt, **kwargs)
        except Exception as exc:  # pylint: disable=broad-except
            paramiko_result = exc
        remoter._reconnect()
        try:
            paramiko_result2 = remoter.run(stmt, **kwargs)
        except Exception as exc:  # pylint: disable=broad-except
            paramiko_result2 = exc
        remoter.stop()

        remoter = RemoteLibSSH2CmdRunner(
            hostname='127.0.0.1', user=getpass.getuser(), key_file=self.key_file)
        try:
            lib2ssh_result = remoter.run(stmt, **kwargs)
        except Exception as exc:  # pylint: disable=broad-except
            lib2ssh_result = exc
        remoter._reconnect()
        try:
            lib2ssh_result2 = remoter.run(stmt, **kwargs)
        except Exception as exc:  # pylint: disable=broad-except
            lib2ssh_result2 = exc
        remoter.stop()
        self._compare_results(expected, paramiko_result, stmt=stmt, kwargs=kwargs)
        self._compare_results(expected, paramiko_result2, stmt=stmt, kwargs=kwargs)
        self._compare_results(expected, lib2ssh_result, stmt=stmt, kwargs=kwargs)
        self._compare_results(expected, lib2ssh_result2, stmt=stmt, kwargs=kwargs)
示例#26
0
def stop_auto_ssh(docker_name, node):
    """
    stops an autossh docker instance
    :param docker_name: prefix of the docker name (cluster.Setup.test_id() usually would be used)
    :param node: an instance of a class derived from BaseNode that has ssh_login_info
    :return: None
    """
    # pylint: disable=protected-access

    host_name = node.ssh_login_info['hostname']
    container_name = f"{docker_name}-{host_name}-autossh"
    local_runner = LocalCmdRunner()
    LOGGER.debug("Saving autossh container logs")
    local_runner.run(f"docker logs {container_name} &> {node.logdir}/autossh.log", ignore_status=True)
    LOGGER.debug(f"Killing {container_name}")
    local_runner.run(f"docker rm -f {container_name}", ignore_status=True)
示例#27
0
def start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir,
                  scylla_version):  # pylint: disable=unused-argument
    lr = LocalCmdRunner()  # pylint: disable=invalid-name
    lr.run('cd {}; ./kill-all.sh'.format(monitoring_dockers_dir))
    cmd = dedent("""cd {monitoring_dockers_dir};
            ./start-all.sh \
            -s {monitoring_dockers_dir}/config/scylla_servers.yml \
            -n {monitoring_dockers_dir}/config/node_exporter_servers.yml \
            -d {monitoring_stack_data_dir} -v {scylla_version}""".format(
        **locals()))
    res = lr.run(cmd, ignore_status=True)
    if res.ok:
        r = lr.run('docker ps')  # pylint: disable=invalid-name
        LOGGER.info(r.stdout.encode('utf-8'))
        return True
    else:
        raise Exception('dockers start failed. {}'.format(res))
示例#28
0
class TestHydraSh(unittest.TestCase):
    cmd_runner = LocalCmdRunner()

    @staticmethod
    def prepare_environment(env):
        for name in os.environ:
            if any(
                    name.startswith(prefix)
                    for prefix in ['SCT_', 'AWS_', 'GOOGLE_']):
                del os.environ[name]

        for name, value in env.items():
            os.environ[name] = value

    @staticmethod
    @contextlib.contextmanager
    def environ():
        old_environment = os.environ.copy()
        yield
        os.environ.clear()
        for name, value in old_environment.items():
            os.environ[name] = value

    @staticmethod
    def validate_result(
        result,
        expected_status: int,
        expected: Sequence[Union[str, re.Pattern]],
        not_expected: Sequence[Union[str, re.Pattern]],
    ):
        errors = []
        if expected_status is not None:
            if result.return_code != expected_status:
                errors.append(
                    f'Returned status {result.return_code}, while expected {expected_status}'
                )

        for pattern_expected in expected:
            if isinstance(pattern_expected, re.Pattern):
                if not pattern_expected.search(result.stdout):
                    errors.append(
                        f"Can't find regex {pattern_expected.pattern}")
            elif isinstance(pattern_expected, str):
                if pattern_expected not in result.stdout:
                    errors.append(f"Can't find {pattern_expected}")

        for pattern_not_expected in not_expected:
            if isinstance(pattern_not_expected, re.Pattern):
                if pattern_not_expected.search(result.stdout):
                    errors.append(
                        f"Found pattern that should not be there: {pattern_not_expected}"
                    )
            elif isinstance(pattern_not_expected, str):
                if pattern_not_expected in result.stdout:
                    errors.append(
                        f"Found pattern that should not be there: {pattern_not_expected}"
                    )
        return errors

    @parameterized.expand(
        LongevityPipelineTest(
            backend='aws', runner=False, aws_creds=True,
            gce_creds=True).hydra_test_cases + LongevityPipelineTest(
                backend='aws', runner=True, aws_creds=True,
                gce_creds=True).hydra_test_cases +
        LongevityPipelineTest(
            backend='gce', runner=False, aws_creds=True,
            gce_creds=True).hydra_test_cases + LongevityPipelineTest(
                backend='gce', runner=True, aws_creds=True,
                gce_creds=True).hydra_test_cases +
        LongevityPipelineTest(
            backend='gce-siren', runner=False, aws_creds=True,
            gce_creds=True).hydra_test_cases +
        LongevityPipelineTest(
            backend='gce-siren', runner=True, aws_creds=True,
            gce_creds=True).hydra_test_cases)
    def test_run_test(self, test_case_params: HydraTestCaseParams,
                      tmp_dir: HydraTestCaseTmpDir):
        with tmp_dir, self.environ():
            cmd, expected, not_expected, expected_status, env = test_case_params.as_tuple
            self.prepare_environment(env)
            result = self.cmd_runner.run(sct_abs_path('docker/env/hydra.sh') +
                                         ' --dry-run-hydra ' + cmd,
                                         ignore_status=True)
            errors = self.validate_result(
                result=result,
                expected_status=expected_status,
                expected=expected,
                not_expected=not_expected,
            )

        assert not errors, f'Case: {cmd}\nReturned:\n{result}\nFound following errors:\n' + (
            '\n'.join(errors))
from sdcm.sct_events.system import SpotTerminationEvent
from sdcm.utils.aws_utils import tags_as_ec2_tags, ec2_instance_wait_public_ip
from sdcm.utils.common import list_instances_aws, get_ami_tags, MAX_SPOT_DURATION_TIME
from sdcm.utils.decorators import retrying
from sdcm.wait import exponential_retry

LOGGER = logging.getLogger(__name__)

INSTANCE_PROVISION_ON_DEMAND = 'on_demand'
INSTANCE_PROVISION_SPOT_FLEET = 'spot_fleet'
INSTANCE_PROVISION_SPOT_LOW_PRICE = 'spot_low_price'
INSTANCE_PROVISION_SPOT_DURATION = 'spot_duration'
SPOT_CNT_LIMIT = 20
SPOT_FLEET_LIMIT = 50
SPOT_TERMINATION_CHECK_OVERHEAD = 15
LOCAL_CMD_RUNNER = LocalCmdRunner()
EBS_VOLUME = "attached"
INSTANCE_STORE = "instance_store"

P = ParamSpec("P")  # pylint: disable=invalid-name
R = TypeVar("R")  # pylint: disable=invalid-name

# pylint: disable=too-many-lines


class AWSCluster(cluster.BaseCluster):  # pylint: disable=too-many-instance-attributes,abstract-method,

    """
    Cluster of Node objects, started on Amazon EC2.
    """
示例#30
0
def restore_monitoring_stack(test_id):
    from sdcm.remote import LocalCmdRunner

    lr = LocalCmdRunner()
    logger.info("Checking that docker is available...")
    result = lr.run('docker ps', ignore_status=True, verbose=False)
    if result.ok:
        logger.info('Docker is available')
    else:
        logger.warning(
            'Docker is not available on your computer. Please install docker software before continue'
        )
        return False

    monitor_stack_base_dir = tempfile.mkdtemp()
    stored_files_by_test_id = list_logs_by_test_id(test_id)
    monitor_stack_archives = []
    for f in stored_files_by_test_id:
        if f['type'] in ['monitoring_data_stack', 'prometheus']:
            monitor_stack_archives.append(f)
    if not monitor_stack_archives or len(monitor_stack_archives) < 2:
        logger.warning(
            'There is no available archive files for monitoring data stack restoring for test id : {}'
            .format(test_id))
        return False

    for arch in monitor_stack_archives:
        logger.info('Download file {} to directory {}'.format(
            arch['link'], monitor_stack_base_dir))
        local_path_monitor_stack = S3Storage().download_file(
            arch['link'], dst_dir=monitor_stack_base_dir)
        monitor_stack_workdir = os.path.dirname(local_path_monitor_stack)
        monitoring_stack_archive_file = os.path.basename(
            local_path_monitor_stack)
        logger.info('Extracting data from archive {}'.format(
            arch['file_path']))
        if arch['type'] == 'prometheus':
            monitoring_stack_data_dir = os.path.join(monitor_stack_workdir,
                                                     'monitor_data_dir')
            cmd = dedent("""
                mkdir -p {data_dir}
                cd {data_dir}
                cp ../{archive} ./
                tar -xvf {archive}
                chmod -R 777 {data_dir}
                """.format(data_dir=monitoring_stack_data_dir,
                           archive=monitoring_stack_archive_file))
            result = lr.run(cmd, ignore_status=True)
        else:
            branches = re.search(
                '(?P<monitoring_branch>branch-[\d]+\.[\d]+?)_(?P<scylla_version>[\d]+\.[\d]+?)',
                monitoring_stack_archive_file)
            monitoring_branch = branches.group('monitoring_branch')
            scylla_version = branches.group('scylla_version')
            cmd = dedent("""
                cd {workdir}
                tar -xvf {archive}
                """.format(workdir=monitor_stack_workdir,
                           archive=monitoring_stack_archive_file))
            result = lr.run(cmd, ignore_status=True)
        if not result.ok:
            logger.warning(
                "During restoring file {} next errors occured:\n {}".format(
                    arch['link'], result))
            return False
        logger.info("Extracting data finished")

    logger.info(
        'Monitoring stack files available {}'.format(monitor_stack_workdir))

    monitoring_dockers_dir = os.path.join(
        monitor_stack_workdir,
        'scylla-monitoring-{}'.format(monitoring_branch))

    def upload_sct_dashboards():
        sct_dashboard_file_name = "scylla-dash-per-server-nemesis.{}.json".format(
            scylla_version)
        sct_dashboard_file = os.path.join(monitoring_dockers_dir,
                                          'sct_monitoring_addons',
                                          sct_dashboard_file_name)
        if not os.path.exists(sct_dashboard_file):
            logger.info('There is no dashboard {}. Skip load dashboard'.format(
                sct_dashboard_file_name))
            return False

        dashboard_url = 'http://*****:*****@retrying(n=3, sleep_time=1, message='Start docker containers')
    def start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir,
                      scylla_version):
        lr.run('cd {}; ./kill-all.sh'.format(monitoring_dockers_dir))
        cmd = dedent("""cd {monitoring_dockers_dir};
                ./start-all.sh \
                -s {monitoring_dockers_dir}/config/scylla_servers.yml \
                -n {monitoring_dockers_dir}/config/node_exporter_servers.yml \
                -d {monitoring_stack_data_dir} -v {scylla_version}""".format(
            **locals()))
        res = lr.run(cmd, ignore_status=True)
        if res.ok:
            r = lr.run('docker ps')
            logger.info(r.stdout.encode('utf-8'))
            return True
        else:
            raise Exception('dockers start failed. {}'.format(res))

    status = False
    status = start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir,
                           scylla_version)
    upload_sct_dashboards()
    upload_annotations()
    return status