def decompress_corpus(archive_path, documents_path, uncompressed_size): if uncompressed_size: console.info( "Decompressing track data from [%s] to [%s] (resulting size: %.2f GB) ... " % (archive_path, documents_path, convert.bytes_to_gb(uncompressed_size)), end='', flush=True, logger=logger) else: console.info("Decompressing track data from [%s] to [%s] ... " % (archive_path, documents_path), end='', flush=True, logger=logger) io.decompress(archive_path, io.dirname(archive_path)) console.println("[OK]") if not os.path.isfile(documents_path): raise exceptions.DataError( "Decompressing [%s] did not create [%s]. Please check with the track author if the compressed " "archive has been created correctly." % (archive_path, documents_path)) extracted_bytes = os.path.getsize(documents_path) if uncompressed_size is not None and extracted_bytes != uncompressed_size: raise exceptions.DataError( "[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected." % (documents_path, extracted_bytes, uncompressed_size))
def decompress(data_set_path, expected_size_in_bytes): # we assume that track data are always compressed and try to decompress them before running the benchmark basename, extension = io.splitext(data_set_path) decompressed = False if not os.path.isfile(basename) or os.path.getsize( basename) != expected_size_in_bytes: decompressed = True if type.uncompressed_size_in_bytes: console.info( "Decompressing track data from [%s] to [%s] (resulting size: %.2f GB) ... " % (data_set_path, basename, convert.bytes_to_gb(type.uncompressed_size_in_bytes)), end='', flush=True, logger=logger) else: console.info( "Decompressing track data from [%s] to [%s] ... " % (data_set_path, basename), end='', flush=True, logger=logger) io.decompress(data_set_path, io.dirname(data_set_path)) console.println("[OK]") extracted_bytes = os.path.getsize(basename) if expected_size_in_bytes is not None and extracted_bytes != expected_size_in_bytes: raise exceptions.DataError( "[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected." % (basename, extracted_bytes, expected_size_in_bytes)) return basename, decompressed
def on_benchmark_stop(self): logger.info("Analyzing merge times.") # first decompress all logs. They have unique names so it's safe to do that. It's easier to first decompress everything for log_file in os.listdir(self.node_log_dir): log_path = "%s/%s" % (self.node_log_dir, log_file) if io.is_archive(log_path): logger.info("Decompressing [%s] to analyze merge times..." % log_path) io.decompress(log_path, self.node_log_dir) # we need to add up times from all files merge_times = {} for log_file in os.listdir(self.node_log_dir): log_path = "%s/%s" % (self.node_log_dir, log_file) if not io.is_archive(log_file): logger.debug("Analyzing merge times in [%s]" % log_path) with open(log_path, mode="rt", encoding="utf-8") as f: self._extract_merge_times(f, merge_times) else: logger.debug("Skipping archived logs in [%s]." % log_path) if merge_times: self._store_merge_times(merge_times) logger.info( "Finished analyzing merge times. Extracted [%s] different merge time components." % len(merge_times))
def install(self, binary): logger.info("Preparing candidate locally in [%s]." % self.install_dir) io.ensure_dir(self.install_dir) io.ensure_dir(self.node_log_dir) logger.info("Unzipping %s to %s" % (binary, self.install_dir)) io.decompress(binary, self.install_dir) self.es_home_path = glob.glob("%s/elasticsearch*" % self.install_dir)[0] self.data_paths = self._data_paths()
def _install_binary(self): binary = self._config.opts("builder", "candidate.bin.path") install_dir = self._install_dir() logger.info("Preparing candidate locally in %s." % install_dir) io.ensure_dir(install_dir) logger.info("Unzipping %s to %s" % (binary, install_dir)) io.decompress(binary, install_dir) binary_path = glob.glob("%s/elasticsearch*" % install_dir)[0] self._config.add(config.Scope.benchmark, "provisioning", "local.binary.path", binary_path)
def decompress(data_set_path, expected_size_in_bytes): # we assume that track data are always compressed and try to decompress them before running the benchmark basename, extension = io.splitext(data_set_path) if not os.path.isfile(basename) or os.path.getsize(basename) != expected_size_in_bytes: logger.info("Unzipping track data from [%s] to [%s]." % (data_set_path, basename)) io.decompress(data_set_path, io.dirname(data_set_path)) extracted_bytes = os.path.getsize(basename) if extracted_bytes != expected_size_in_bytes: raise exceptions.DataError("[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected." % (basename, extracted_bytes, expected_size_in_bytes))
def _install_binary(self, binary): logger.info("Preparing candidate locally in [%s]." % self.install_dir) io.ensure_dir(self.install_dir) if not self.preserve: console.info( "Rally will delete the benchmark candidate after the benchmark" ) logger.info("Unzipping %s to %s" % (binary, self.install_dir)) io.decompress(binary, self.install_dir) self.binary_path = glob.glob("%s/elasticsearch*" % self.install_dir)[0]
def _install_binary(self): binary = self._config.opts("builder", "candidate.bin.path") install_dir = self._install_dir() logger.info("Preparing candidate locally in [%s]." % install_dir) io.ensure_dir(install_dir) if not self.preserve: console.info("Rally will delete the benchmark candidate after the benchmark") logger.info("Unzipping %s to %s" % (binary, install_dir)) io.decompress(binary, install_dir) binary_path = glob.glob("%s/elasticsearch*" % install_dir)[0] self._config.add(config.Scope.benchmark, "provisioning", "local.binary.path", binary_path)
def test_decompresses_supported_file_formats(self): for ext in ["zip", "gz", "bz2", "tgz", "tar.bz2", "tar.gz"]: tmp_dir = tempfile.mkdtemp() archive_path = "%s/resources/test.txt.%s" % (os.path.dirname(os.path.abspath(__file__)), ext) decompressed_path = "%s/test.txt" % tmp_dir io.decompress(archive_path, target_directory=tmp_dir) self.assertTrue(os.path.exists(decompressed_path), msg="Could not decompress [%s] to [%s] (target file does not exist)" % (archive_path, decompressed_path)) self.assertEqual("Sample text for DecompressionTests\n", self.read(decompressed_path), msg="Could not decompress [%s] to [%s] (target file is corrupt)" % (archive_path, decompressed_path))
def install(self, binary): self.logger.info("Preparing candidate locally in [%s].", self.install_dir) io.ensure_dir(self.install_dir) io.ensure_dir(self.node_log_dir) io.ensure_dir(self.heap_dump_dir) self.logger.info("Unzipping %s to %s", binary, self.install_dir) io.decompress(binary, self.install_dir) self.es_home_path = glob.glob( os.path.join(self.install_dir, "elasticsearch*"))[0] self.data_paths = self._data_paths()
def test_decompresses_supported_file_formats(self): for ext in ["zip", "gz", "bz2", "tgz", "tar.bz2", "tar.gz"]: tmp_dir = tempfile.mkdtemp() archive_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", f"test.txt.{ext}") decompressed_path = os.path.join(tmp_dir, "test.txt") io.decompress(archive_path, target_directory=tmp_dir) assert os.path.exists(decompressed_path) is True,\ f"Could not decompress [{archive_path}] to [{decompressed_path}] (target file does not exist)" assert self.read(decompressed_path) == "Sample text for DecompressionTests\n",\ f"Could not decompress [{archive_path}] to [{decompressed_path}] (target file is corrupt)"
def decompress(data_set_path, expected_size_in_bytes): # we assume that track data are always compressed and try to decompress them before running the benchmark basename, extension = io.splitext(data_set_path) if not os.path.isfile(basename) or os.path.getsize(basename) != expected_size_in_bytes: logger.info("Unzipping track data from [%s] to [%s]." % (data_set_path, basename)) print("Decompressing %s (resulting size: %.2f GB) ... " % (type.document_archive, convert.bytes_to_gb(type.uncompressed_size_in_bytes)), end='', flush=True) io.decompress(data_set_path, io.dirname(data_set_path)) print("Done") extracted_bytes = os.path.getsize(basename) if extracted_bytes != expected_size_in_bytes: raise exceptions.DataError("[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected." % (basename, extracted_bytes, expected_size_in_bytes))
def install_certificates(config_names, variables, **kwargs): if "security" not in config_names: return False node_name = variables["node_name"] node_ip = variables["node_ip"] install_root = variables["install_root_path"] x_pack_config_path = os.path.join(install_root, "config", "x-pack") logger.info("Installing x-pack certificates for node [%s]." % node_name) # 0. Create instances.yml for the current ES node. instances_yml = os.path.join(tempfile.mkdtemp(), "instances.yml") with open(instances_yml, "w") as f: f.write( instances_yml_template.format(node_name=node_name, node_ip=node_ip)) # 1. Create certificate if needed. We will prebundle the CA with Rally and generate instance certificates based on this CA. cert_gen = os.path.join(install_root, "bin", "x-pack", "certgen") cert_bundle = os.path.join(install_root, "config", "x-pack", "node-cert.zip") # ./bin/x-pack/certgen # -cert=/Users/daniel/.rally/benchmarks/distributions/elasticsearch-5.5.0/config/x-pack/ca/ca.crt # -key=/Users/daniel/.rally/benchmarks/distributions/elasticsearch-5.5.0/config/x-pack/ca/ca.key # -in=/Users/daniel/.rally/benchmarks/distributions/elasticsearch-5.5.0/config/instances.yml # -out=/Users/daniel/.rally/benchmarks/distributions/elasticsearch-5.5.0/config/x-pack/node-cert.zip return_code = process.run_subprocess_with_logging( '{cert_gen} -cert="{config_path}/ca/ca.crt" -key="{config_path}/ca/ca.key" -in="{instances_yml}" -out="{cert_bundle}"' .format(cert_gen=cert_gen, config_path=x_pack_config_path, instances_yml=instances_yml, cert_bundle=cert_bundle)) if return_code != 0: logger.error("certgen has exited with code [%d]" % return_code) raise exceptions.SystemSetupError( "Could not create x-pack certificate bundle for node [%s]. Please see the log for details." % node_name) # 2. Unzip /Users/daniel/.rally/benchmarks/distributions/elasticsearch-5.5.0/config/x-pack/node-cert.zip io.decompress(cert_bundle, x_pack_config_path) # Success return True
def test_decompresses_supported_file_formats_with_lib_as_failover(self, mocked_is_executable): for ext in ["zip", "gz", "bz2", "tgz", "tar.bz2", "tar.gz"]: tmp_dir = tempfile.mkdtemp() archive_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", f"test.txt.{ext}") decompressed_path = os.path.join(tmp_dir, "test.txt") logger = logging.getLogger("esrally.utils.io") with mock.patch.object(logger, "warning") as mocked_console_warn: io.decompress(archive_path, target_directory=tmp_dir) assert os.path.exists(decompressed_path) is True,\ f"Could not decompress [{archive_path}] to [{decompressed_path}] (target file does not exist)" assert self.read(decompressed_path) == "Sample text for DecompressionTests\n",\ f"Could not decompress [{archive_path}] to [{decompressed_path}] (target file is corrupt)" if ext in ["bz2", "gz"]: assert "not found in PATH. Using standard library, decompression will take longer." in mocked_console_warn.call_args[0][0]
def install_certificates(config_names, variables, **kwargs): if "x-pack-security" not in config_names: return False logger = logging.getLogger(LOGGER_NAME) cert_binary = "elasticsearch-certutil" node_name = variables["node_name"] node_ip = variables["node_ip"] install_root = variables["install_root_path"] bundled_ca_path = os.path.join(os.path.dirname(__file__), "ca") x_pack_config_path = os.path.join(install_root, "config", "x-pack") logger.info("Installing certificates for node [%s].", node_name) instances_yml = os.path.join(tempfile.mkdtemp(), "instances.yml") with open(instances_yml, "w") as f: f.write( instances_yml_template.format(node_name=node_name, node_ip=node_ip)) # Generate instance certificates based on a CA that is pre-bundled with Rally certutil = resolve_binary(install_root, cert_binary) cert_bundle = os.path.join(install_root, "node-cert.zip") return_code = process.run_subprocess_with_logging( '{certutil} cert --silent --in "{instances_yml}" --out="{cert_bundle}" --ca-cert="{ca_path}/ca.crt" ' '--ca-key="{ca_path}/ca.key" --pass ""'.format( certutil=certutil, ca_path=bundled_ca_path, instances_yml=instances_yml, cert_bundle=cert_bundle), env=kwargs.get("env")) if return_code != 0: logger.error("%s has exited with code [%d]", cert_binary, return_code) raise exceptions.SystemSetupError( "Could not create certificate bundle for node [{}]. Please see the log for details." .format(node_name)) io.decompress(cert_bundle, x_pack_config_path) # Success return True
def decompress(data_set_path, expected_size_in_bytes): # we assume that track data are always compressed and try to decompress them before running the benchmark basename, extension = io.splitext(data_set_path) decompressed = False if not os.path.isfile(basename) or os.path.getsize(basename) != expected_size_in_bytes: decompressed = True if type.uncompressed_size_in_bytes: console.info("Decompressing track data from [%s] to [%s] (resulting size: %.2f GB) ... " % (data_set_path, basename, convert.bytes_to_gb(type.uncompressed_size_in_bytes)), end='', flush=True, logger=logger) else: console.info("Decompressing track data from [%s] to [%s] ... " % (data_set_path, basename), end='', flush=True, logger=logger) io.decompress(data_set_path, io.dirname(data_set_path)) console.println("[OK]") extracted_bytes = os.path.getsize(basename) if expected_size_in_bytes is not None and extracted_bytes != expected_size_in_bytes: raise exceptions.DataError("[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected." % (basename, extracted_bytes, expected_size_in_bytes)) return basename, decompressed