def restore_monitor_stack(test_id, date_time=None): if not is_docker_available(): return False monitor_stack_archives = get_monitor_set_archives(test_id) arch = get_monitor_stack_archive(monitor_stack_archives, date_time) if not arch: return False # Arch element structure: # { # "file_path": log_file, # "type": log_type, # "link": link to archive, # "date": date of create # } LOGGER.info('Restoring monitoring stack from archive %s', arch['file_path']) monitor_stack_base_dir = tempfile.mkdtemp() LOGGER.info('Download file {} to directory {}'.format( arch['link'], monitor_stack_base_dir)) downloaded_monitor_archive = S3Storage().download_file( arch['link'], dst_dir=monitor_stack_base_dir) monitor_data_arch = extract_monitor_data_archive( downloaded_monitor_archive, monitor_stack_base_dir) monitor_stack_arch = extract_monitor_stack_archive( downloaded_monitor_archive, monitor_stack_base_dir) if not monitor_data_arch: LOGGER.error("No prometheus snapshot were found in arch %s", arch['file_path']) return False if not monitor_stack_arch: LOGGER.error("No monitor stack archive were found in arch %s", arch['file_path']) return False monitor_data_dir = create_monitoring_data_dir(monitor_stack_base_dir, monitor_data_arch) monitor_stack_dir = create_monitoring_stack_dir(monitor_stack_base_dir, monitor_stack_arch) if not monitor_stack_dir or not monitor_data_dir: LOGGER.error( 'Creating monitor stack directories failed:\ndata_dir: %s; stack_dir: %s', monitor_data_dir, monitor_stack_dir) _, scylla_version = get_monitorstack_scylla_version(monitor_stack_dir) status = start_dockers(monitor_stack_dir, monitor_data_dir, scylla_version) if status: upload_sct_dashboards(monitor_stack_dir, scylla_version) upload_annotations(monitor_stack_dir) return status else: LOGGER.error('Error during dockers starting. Trying next arhive') remove_files(monitor_stack_base_dir) return False
def restore_monitoring_stack(test_id, date_time=None): # pylint: disable=too-many-return-statements if not is_docker_available(): return False arch = get_monitoring_stack_archive(test_id, date_time) if not arch: return False # Arch element structure: # { # "file_path": log_file, # "type": log_type, # "link": link to archive, # "date": date of create # } LOGGER.info('Restoring monitoring stack from archive %s', arch['file_path']) monitoring_stack_base_dir = tempfile.mkdtemp() LOGGER.info('Download file {} to directory {}'.format(arch['link'], monitoring_stack_base_dir)) downloaded_monitoring_archive = S3Storage().download_file(arch['link'], dst_dir=monitoring_stack_base_dir) monitoring_data_arch = extract_monitoring_data_archive(downloaded_monitoring_archive, monitoring_stack_base_dir) monitoring_stack_arch = extract_monitoring_stack_archive(downloaded_monitoring_archive, monitoring_stack_base_dir) if not monitoring_data_arch: LOGGER.error("No prometheus snapshot were found in arch %s", arch['file_path']) return False if not monitoring_stack_arch: LOGGER.error("No monitoring stack archive were found in arch %s", arch['file_path']) return False monitoring_data_dir = create_monitoring_data_dir(monitoring_stack_base_dir, monitoring_data_arch) monitoring_stack_dir = create_monitoring_stack_dir(monitoring_stack_base_dir, monitoring_stack_arch) if not monitoring_stack_dir or not monitoring_data_dir: LOGGER.error('Creating monitoring stack directories failed:\ndata_dir: %s; stack_dir: %s', monitoring_data_dir, monitoring_stack_dir) _, scylla_version = get_monitoring_stack_scylla_version(monitoring_stack_dir) status = run_monitoring_stack_containers(monitoring_stack_dir, monitoring_data_dir, scylla_version) if not status: return False status = restore_grafana_dashboards_and_annotations(monitoring_stack_dir, scylla_version) if not status: return False status = verify_monitoring_stack(scylla_version) if not status: remove_files(monitoring_stack_base_dir) return False LOGGER.info("Monitoring stack is running") return True
def collect_logs(self, local_search_path=None): def collect_logs_per_node(node): LOGGER.info('Collecting logs on host: %s', node.name) remote_node_dir = self.create_remote_storage_dir(node) local_node_dir = os.path.join(self.local_dir, node.name) for log_entity in self.log_entities: try: log_entity.collect(node, local_node_dir, remote_node_dir, local_search_path=local_search_path) except Exception as details: # pylint: disable=unused-variable, broad-except LOGGER.error( "Error occured during collecting on host: %s\n%s", node.name, details) LOGGER.debug("Nodes list %s", [node.name for node in self.nodes]) if not self.nodes: LOGGER.warning( f'No nodes found for {self.cluster_log_type} cluster. Logs will not be collected' ) return None try: workers_number = int(len(self.nodes) / 2) workers_number = len( self.nodes) if workers_number < 2 else workers_number ParallelObject(self.nodes, num_workers=workers_number, timeout=self.collect_timeout).run( collect_logs_per_node, ignore_exceptions=True) except Exception as details: # pylint: disable=broad-except LOGGER.error('Error occured during collecting logs %s', details) if not os.listdir(self.local_dir): LOGGER.warning('Directory %s is empty', self.local_dir) return None final_archive = self.archive_dir_with_zip64(self.local_dir) if not final_archive: return None s3_link = self.upload_logs(final_archive, "{0.test_id}/{0.current_run}".format(self)) remove_files(self.local_dir) remove_files(final_archive) return s3_link
def collect_logs(self, local_search_path=None): for ent in self.log_entities: ent.collect(None, self.local_dir, None, local_search_path=local_search_path) if not os.listdir(self.local_dir): LOGGER.warning('No any local files') LOGGER.info('Searching on builders') builders = get_builder_by_test_id(self.test_id) for obj in builders: builder = CollectingNode(name=obj['builder']['name'], ssh_login_info={ "hostname": obj['builder']['public_ip'], "user": obj['builder']['user'], "key_file": obj["builder"]['key_file'] }, instance=None, global_ip=obj['builder']['public_ip']) for ent in self.log_entities: ent.collect_from_builder(builder, self.local_dir, obj["path"]) if not os.listdir(self.local_dir): LOGGER.warning('Nothing found') return None final_archive = self.archive_dir_with_zip64(self.local_dir) s3_link = self.upload_logs(final_archive, "{0.test_id}/{0.current_run}".format(self)) remove_files(self.local_dir) remove_files(final_archive) return s3_link