Пример #1
0
def restore_monitor_stack(test_id, date_time=None):
    if not is_docker_available():
        return False

    monitor_stack_archives = get_monitor_set_archives(test_id)
    arch = get_monitor_stack_archive(monitor_stack_archives, date_time)
    if not arch:
        return False
    # Arch element structure:
    #     {
    #         "file_path": log_file,
    #         "type": log_type,
    #         "link": link to archive,
    #         "date": date of create
    #     }

    LOGGER.info('Restoring monitoring stack from archive %s',
                arch['file_path'])
    monitor_stack_base_dir = tempfile.mkdtemp()
    LOGGER.info('Download file {} to directory {}'.format(
        arch['link'], monitor_stack_base_dir))
    downloaded_monitor_archive = S3Storage().download_file(
        arch['link'], dst_dir=monitor_stack_base_dir)
    monitor_data_arch = extract_monitor_data_archive(
        downloaded_monitor_archive, monitor_stack_base_dir)
    monitor_stack_arch = extract_monitor_stack_archive(
        downloaded_monitor_archive, monitor_stack_base_dir)

    if not monitor_data_arch:
        LOGGER.error("No prometheus snapshot were found in arch %s",
                     arch['file_path'])
        return False
    if not monitor_stack_arch:
        LOGGER.error("No monitor stack archive were found in arch %s",
                     arch['file_path'])
        return False

    monitor_data_dir = create_monitoring_data_dir(monitor_stack_base_dir,
                                                  monitor_data_arch)
    monitor_stack_dir = create_monitoring_stack_dir(monitor_stack_base_dir,
                                                    monitor_stack_arch)

    if not monitor_stack_dir or not monitor_data_dir:
        LOGGER.error(
            'Creating monitor stack directories failed:\ndata_dir: %s; stack_dir: %s',
            monitor_data_dir, monitor_stack_dir)
    _, scylla_version = get_monitorstack_scylla_version(monitor_stack_dir)

    status = start_dockers(monitor_stack_dir, monitor_data_dir, scylla_version)
    if status:
        upload_sct_dashboards(monitor_stack_dir, scylla_version)
        upload_annotations(monitor_stack_dir)
        return status
    else:
        LOGGER.error('Error during dockers starting. Trying next arhive')
        remove_files(monitor_stack_base_dir)
        return False
Пример #2
0
def restore_monitoring_stack(test_id, date_time=None):  # pylint: disable=too-many-return-statements
    if not is_docker_available():
        return False

    arch = get_monitoring_stack_archive(test_id, date_time)
    if not arch:
        return False
    # Arch element structure:
    #     {
    #         "file_path": log_file,
    #         "type": log_type,
    #         "link": link to archive,
    #         "date": date of create
    #     }

    LOGGER.info('Restoring monitoring stack from archive %s', arch['file_path'])
    monitoring_stack_base_dir = tempfile.mkdtemp()
    LOGGER.info('Download file {} to directory {}'.format(arch['link'], monitoring_stack_base_dir))
    downloaded_monitoring_archive = S3Storage().download_file(arch['link'],
                                                              dst_dir=monitoring_stack_base_dir)
    monitoring_data_arch = extract_monitoring_data_archive(downloaded_monitoring_archive,
                                                           monitoring_stack_base_dir)
    monitoring_stack_arch = extract_monitoring_stack_archive(downloaded_monitoring_archive,
                                                             monitoring_stack_base_dir)

    if not monitoring_data_arch:
        LOGGER.error("No prometheus snapshot were found in arch %s", arch['file_path'])
        return False
    if not monitoring_stack_arch:
        LOGGER.error("No monitoring stack archive were found in arch %s", arch['file_path'])
        return False

    monitoring_data_dir = create_monitoring_data_dir(monitoring_stack_base_dir, monitoring_data_arch)
    monitoring_stack_dir = create_monitoring_stack_dir(monitoring_stack_base_dir, monitoring_stack_arch)

    if not monitoring_stack_dir or not monitoring_data_dir:
        LOGGER.error('Creating monitoring stack directories failed:\ndata_dir: %s; stack_dir: %s',
                     monitoring_data_dir, monitoring_stack_dir)
    _, scylla_version = get_monitoring_stack_scylla_version(monitoring_stack_dir)

    status = run_monitoring_stack_containers(monitoring_stack_dir, monitoring_data_dir, scylla_version)
    if not status:
        return False

    status = restore_grafana_dashboards_and_annotations(monitoring_stack_dir, scylla_version)
    if not status:
        return False

    status = verify_monitoring_stack(scylla_version)
    if not status:
        remove_files(monitoring_stack_base_dir)
        return False

    LOGGER.info("Monitoring stack is running")
    return True
Пример #3
0
    def collect_logs(self, local_search_path=None):
        def collect_logs_per_node(node):
            LOGGER.info('Collecting logs on host: %s', node.name)
            remote_node_dir = self.create_remote_storage_dir(node)
            local_node_dir = os.path.join(self.local_dir, node.name)
            for log_entity in self.log_entities:
                try:
                    log_entity.collect(node,
                                       local_node_dir,
                                       remote_node_dir,
                                       local_search_path=local_search_path)
                except Exception as details:  # pylint: disable=unused-variable, broad-except
                    LOGGER.error(
                        "Error occured during collecting on host: %s\n%s",
                        node.name, details)

        LOGGER.debug("Nodes list %s", [node.name for node in self.nodes])

        if not self.nodes:
            LOGGER.warning(
                f'No nodes found for {self.cluster_log_type} cluster. Logs will not be collected'
            )
            return None
        try:
            workers_number = int(len(self.nodes) / 2)
            workers_number = len(
                self.nodes) if workers_number < 2 else workers_number
            ParallelObject(self.nodes,
                           num_workers=workers_number,
                           timeout=self.collect_timeout).run(
                               collect_logs_per_node, ignore_exceptions=True)
        except Exception as details:  # pylint: disable=broad-except
            LOGGER.error('Error occured during collecting logs %s', details)

        if not os.listdir(self.local_dir):
            LOGGER.warning('Directory %s is empty', self.local_dir)
            return None

        final_archive = self.archive_dir_with_zip64(self.local_dir)
        if not final_archive:
            return None
        s3_link = self.upload_logs(final_archive,
                                   "{0.test_id}/{0.current_run}".format(self))
        remove_files(self.local_dir)
        remove_files(final_archive)
        return s3_link
Пример #4
0
    def collect_logs(self, local_search_path=None):
        for ent in self.log_entities:
            ent.collect(None,
                        self.local_dir,
                        None,
                        local_search_path=local_search_path)
        if not os.listdir(self.local_dir):
            LOGGER.warning('No any local files')
            LOGGER.info('Searching on builders')
            builders = get_builder_by_test_id(self.test_id)

            for obj in builders:
                builder = CollectingNode(name=obj['builder']['name'],
                                         ssh_login_info={
                                             "hostname":
                                             obj['builder']['public_ip'],
                                             "user":
                                             obj['builder']['user'],
                                             "key_file":
                                             obj["builder"]['key_file']
                                         },
                                         instance=None,
                                         global_ip=obj['builder']['public_ip'])
                for ent in self.log_entities:
                    ent.collect_from_builder(builder, self.local_dir,
                                             obj["path"])

            if not os.listdir(self.local_dir):
                LOGGER.warning('Nothing found')
                return None

        final_archive = self.archive_dir_with_zip64(self.local_dir)

        s3_link = self.upload_logs(final_archive,
                                   "{0.test_id}/{0.current_run}".format(self))
        remove_files(self.local_dir)
        remove_files(final_archive)
        return s3_link