def generate_release(self): yugabyte_folder_prefix = "yugabyte-{}".format(self.base_version) tmp_parent_dir = self.distribution_path + '.tmp_for_tar_gz' os.mkdir(tmp_parent_dir) # Move the distribution directory to a new location named yugabyte-<version> and archive # it from there so it has the right name when extracted. # # We used to do this using the --transform option to the tar command, but that has an # unintended side effect of corrupting library symlinks to files in the same directory. tmp_distribution_dir = os.path.join(tmp_parent_dir, yugabyte_folder_prefix) shutil.move(self.distribution_path, tmp_distribution_dir) try: release_file = self.get_release_file() logging.info( "Changing permissions recursively on directory '%s' to make it user-writable", tmp_distribution_dir) run_program(['chmod', '-R', 'u+w', tmp_distribution_dir], cwd=tmp_parent_dir) logging.info("Creating a package '{}' from directory {}".format( release_file, tmp_distribution_dir)) run_program(['gtar', 'cvzf', release_file, yugabyte_folder_prefix], cwd=tmp_parent_dir) return release_file finally: shutil.move(tmp_distribution_dir, self.distribution_path) os.rmdir(tmp_parent_dir)
def generate_release(self): yugabyte_folder_prefix = "yugabyte-{}".format(self.base_version) tmp_parent_dir = self.distribution_path + '.tmp_for_tar_gz' os.mkdir(tmp_parent_dir) # Move the distribution directory to a new location named yugabyte-<version> and archive # it from there so it has the right name when extracted. # # We used to do this using the --transform option to the tar command, but that has an # unintended side effect of corrupting library symlinks to files in the same directory. tmp_distribution_dir = os.path.join(tmp_parent_dir, yugabyte_folder_prefix) shutil.move(self.distribution_path, tmp_distribution_dir) def change_permissions(mode): logging.info( "Changing permissions recursively on directory '%s': %s", tmp_distribution_dir, mode) cmd_line = ['chmod', '-R', mode, tmp_distribution_dir] run_program(cmd_line, cwd=tmp_parent_dir, log_command=True) try: release_file = self.get_release_file() change_permissions('u+w') change_permissions('a+r') # From chmod manpage, "+X" means: set the execute/search bits if the file is a directory # or any of the execute/search bits are set in the original (unmodified) mode. change_permissions('a+X') logging.info("Creating a package '%s' from directory %s", release_file, tmp_distribution_dir) run_program(['gtar', 'cvzf', release_file, yugabyte_folder_prefix], cwd=tmp_parent_dir) return release_file finally: shutil.move(tmp_distribution_dir, self.distribution_path) os.rmdir(tmp_parent_dir)
def set_new_path(self, filename, old_path, new_path): # We need to use a different command if the path is pointing to itself. Example: # otool - L ./build/debug-clang-dynamic-enterprise/lib/libmaster.dylib # ./build/debug-clang-dynamic-enterprise/ lib/libmaster.dylib: # @rpath/libmaster.dylib cmd = [] if os.path.basename(filename) == os.path.basename(old_path): run_program(['install_name_tool', '-id', new_path, filename]) logging.debug('install_name_tool -id %s %s', new_path, filename) else: run_program(['install_name_tool', '-change', old_path, new_path, filename]) logging.debug('install_name_tool -change %s %s %s', old_path, new_path, filename)
def parallel_list_test_descriptors(rel_test_path): """ This is invoked in parallel to list all individual tests within our C++ test programs. Without this, listing all gtest tests across 330 test programs might take about 5 minutes on TSAN and 2 minutes in debug. """ adjust_pythonpath() wait_for_path_to_exist(YB_PYTHONPATH_ENTRY) try: from yb import yb_dist_tests, command_util except ImportError as ex: raise ImportError("%s. %s" % (ex.message, get_sys_path_info_str())) global_conf = yb_dist_tests.set_global_conf_from_dict(global_conf_dict) global_conf.set_env(propagated_env_vars) wait_for_path_to_exist(global_conf.build_root) list_tests_cmd_line = [ os.path.join(global_conf.build_root, rel_test_path), '--gtest_list_tests' ] try: prog_result = command_util.run_program(list_tests_cmd_line) except OSError, ex: logging.error("Failed running the command: %s", list_tests_cmd_line) raise
def run_patchelf(*args): patchelf_result = run_program([PATCHELF_PATH] + list(args), error_ok=True) if patchelf_result.returncode != 0 and patchelf_result.stderr not in [ 'cannot find section .interp', 'cannot find section .dynamic', PATCHELF_NOT_AN_ELF_EXECUTABLE]: raise RuntimeError(patchelf_result.error_msg) return patchelf_result
def run_patchelf(*args): patchelf_result = run_program([PATCHELF_PATH] + list(args), error_ok=True) if patchelf_result.returncode != 0 and patchelf_result.stderr not in [ 'cannot find section .interp', 'cannot find section .dynamic', PATCHELF_NOT_AN_ELF_EXECUTABLE ]: raise RuntimeError(patchelf_result.error_msg) return patchelf_result
def parallel_list_test_descriptors(rel_test_path): """ This is invoked in parallel to list all individual tests within our C++ test programs. Without this, listing all gtest tests across 330 test programs might take about 5 minutes on TSAN and 2 minutes in debug. """ adjust_pythonpath() try: from yb import yb_dist_tests, command_util except ImportError as ex: raise ImportError("%s. %s" % (ex.message, get_sys_path_info_str())) global_conf = yb_dist_tests.set_global_conf_from_dict(global_conf_dict) global_conf.set_env(propagated_env_vars) prog_result = command_util.run_program([ os.path.join(global_conf.build_root, rel_test_path), '--gtest_list_tests' ]) # --gtest_list_tests gives us the following output format: # TestSplitArgs. # Simple # SimpleWithSpaces # SimpleWithQuotes # BadWithQuotes # Empty # Error # BloomFilterReverseCompatibility # BloomFilterWrapper # PrefixExtractorFullFilter # PrefixExtractorBlockFilter # PrefixScan # OptimizeFiltersForHits # BloomStatsTestWithParam/BloomStatsTestWithParam. # BloomStatsTest/0 # GetParam() = (true, true) # BloomStatsTest/1 # GetParam() = (true, false) # BloomStatsTest/2 # GetParam() = (false, false) # BloomStatsTestWithIter/0 # GetParam() = (true, true) # BloomStatsTestWithIter/1 # GetParam() = (true, false) # BloomStatsTestWithIter/2 # GetParam() = (false, false) current_test = None test_descriptors = [] test_descriptor_prefix = rel_test_path + yb_dist_tests.TEST_DESCRIPTOR_SEPARATOR for line in prog_result.stdout.split("\n"): if ('Starting tracking the heap' in line or 'Dumping heap profile to' in line): continue line = line.rstrip() trimmed_line = HASH_COMMENT_RE.sub('', line.strip()).strip() if line.startswith(' '): test_descriptors.append(test_descriptor_prefix + current_test + trimmed_line) else: current_test = trimmed_line return test_descriptors
def find_elf_dependencies(self, elf_file_path: str) -> Set[Dependency]: """ Run ldd on the given ELF file and find libraries that it depends on. Also run patchelf and get the dynamic linker used by the file. @param elf_file_path: ELF file (executable/library) path """ linuxbrew_home: Optional[LinuxbrewHome] = get_linuxbrew_home() elf_file_path = os.path.realpath(elf_file_path) if SYSTEM_LIBRARY_PATH_RE.match( elf_file_path) or not using_linuxbrew(): ldd_path = '/usr/bin/ldd' else: assert linuxbrew_home is not None assert linuxbrew_home.ldd_path is not None ldd_path = linuxbrew_home.ldd_path ldd_result = run_program([ldd_path, elf_file_path], error_ok=True) dependencies: Set[Dependency] = set() ldd_result_stdout_str = ldd_result.stdout ldd_result_stderr_str = ldd_result.stderr if ldd_result.returncode != 0: # The below error message is printed to stdout on some platforms (CentOS) and # stderr on other platforms (Ubuntu). if 'not a dynamic executable' in (ldd_result_stdout_str, ldd_result_stderr_str): logging.debug( "Not a dynamic executable: {}, ignoring dependency tracking" .format(elf_file_path)) return dependencies raise RuntimeError(ldd_result.error_msg) for ldd_output_line in ldd_result_stdout_str.split("\n"): resolved_dep_match = RESOLVED_DEP_RE.match(ldd_output_line) if resolved_dep_match: lib_name = resolved_dep_match.group(1) lib_resolved_path = os.path.realpath( resolved_dep_match.group(2)) dependencies.add( Dependency(lib_name, lib_resolved_path, elf_file_path, self.context)) tokens = ldd_output_line.split() if len(tokens) >= 4 and tokens[1:4] == ['=>', 'not', 'found']: missing_lib_name = tokens[0] raise RuntimeError("Library not found for '{}': {}".format( elf_file_path, missing_lib_name)) # If we matched neither RESOLVED_DEP_RE or the "not found" case, that is still fine, # e.g. there could be a line of the following form in the ldd output: # linux-vdso.so.1 => (0x00007ffc0f9d2000) return dependencies
def get_linuxbrew_dir(): global linuxbrew_dir if not linuxbrew_dir: find_script_result = run_program( os.path.join(YB_SRC_ROOT, 'build-support', 'find_linuxbrew.sh')) linuxbrew_dir = find_script_result.stdout.strip() if not os.path.isdir(linuxbrew_dir): raise RuntimeError( ("Directory returned by the '{}' script does not exist: '{}'. " + "Details: {}").format(find_script_result.program_path, linuxbrew_dir, find_script_result)) return linuxbrew_dir
def generate_release(self): yugabyte_folder_prefix = "yugabyte-{}".format(self.base_version) tmp_parent_dir = self.distribution_path + '.tmp_for_tar_gz' os.mkdir(tmp_parent_dir) # Move the distribution directory to a new location named yugabyte-<version> and archive # it from there so it has the right name when extracted. # # We used to do this using the --transform option to the tar command, but that has an # unintended side effect of corrupting library symlinks to files in the same directory. tmp_distribution_dir = os.path.join(tmp_parent_dir, yugabyte_folder_prefix) shutil.move(self.distribution_path, tmp_distribution_dir) try: release_file = self.get_release_file() logging.info("Creating a package '{}' from directory {}".format( release_file, tmp_distribution_dir)) run_program(['gtar', 'cvzf', release_file, yugabyte_folder_prefix], cwd=tmp_parent_dir) return release_file finally: shutil.move(tmp_distribution_dir, self.distribution_path) os.rmdir(tmp_parent_dir)
def run_otool(self, parameter, filename): result = run_program(['otool', parameter, filename], error_ok=True) if result.stdout.endswith('is not an object file') or \ result.stderr.endswith('The file was not recognized as a valid object file'): logging.info("Unable to run 'otool %s %s'. File '%s' is not an object file", filename, parameter, filename) return None if result.returncode != 0: raise RuntimeError("Unexpected error running 'otool -l %s': '%s'", filename, result.stderr) return result.stdout
def parallel_list_test_descriptors(rel_test_path): """ This is invoked in parallel to list all individual tests within our C++ test programs. Without this, listing all gtest tests across 330 test programs might take about 5 minutes on TSAN and 2 minutes in debug. """ adjust_pythonpath() from yb import yb_dist_tests, command_util global_conf = yb_dist_tests.set_global_conf_from_dict(global_conf_dict) global_conf.set_env(propagated_env_vars) prog_result = command_util.run_program( [os.path.join(global_conf.build_root, rel_test_path), '--gtest_list_tests']) # --gtest_list_tests gives us the following output format: # TestSplitArgs. # Simple # SimpleWithSpaces # SimpleWithQuotes # BadWithQuotes # Empty # Error # BloomFilterReverseCompatibility # BloomFilterWrapper # PrefixExtractorFullFilter # PrefixExtractorBlockFilter # PrefixScan # OptimizeFiltersForHits # BloomStatsTestWithParam/BloomStatsTestWithParam. # BloomStatsTest/0 # GetParam() = (true, true) # BloomStatsTest/1 # GetParam() = (true, false) # BloomStatsTest/2 # GetParam() = (false, false) # BloomStatsTestWithIter/0 # GetParam() = (true, true) # BloomStatsTestWithIter/1 # GetParam() = (true, false) # BloomStatsTestWithIter/2 # GetParam() = (false, false) current_test = None test_descriptors = [] test_descriptor_prefix = rel_test_path + yb_dist_tests.TEST_DESCRIPTOR_SEPARATOR for line in prog_result.stdout.split("\n"): if ('Starting tracking the heap' in line or 'Dumping heap profile to' in line): continue line = line.rstrip() trimmed_line = HASH_COMMENT_RE.sub('', line.strip()).strip() if line.startswith(' '): test_descriptors.append(test_descriptor_prefix + current_test + trimmed_line) else: current_test = trimmed_line return test_descriptors
def update_interpreter(binary_path): """ Runs patchelf --set-interpreter on the given file to use Linuxbrew's dynamic loader if necessary. @return True in case of success. """ print_interpreter_result = run_program([patchelf_path, '--print-interpreter', binary_path], error_ok=True) if print_interpreter_result.returncode != 0: if print_interpreter_result.stderr.strip() == "cannot find section .interp": return True # This is OK. logging.error(print_interpreter.error_msg) return False interpreter_path = print_interpreter_result.stdout.strip() linuxbrew_interpreter_path_suffix = '.linuxbrew-yb-build/lib/ld.so' if interpreter_path.endswith('/' + linuxbrew_interpreter_path_suffix): new_interpreter_path = os.path.join(linuxbrew_dir, 'lib', 'ld.so') if new_interpreter_path != interpreter_path: logging.debug( "Setting interpreter to '{}' on '{}'".format(new_interpreter_path, binary_path)) run_program([patchelf_path, '--set-interpreter', new_interpreter_path, binary_path]) return True
def find_elf_dependencies(self, elf_file_path): """ Run ldd on the given ELF file and find libraries that it depends on. Also run patchelf and get the dynamic linker used by the file. @param elf_file_path: ELF file (executable/library) path """ elf_file_path = os.path.realpath(elf_file_path) if SYSTEM_LIBRARY_PATH_RE.match(elf_file_path): ldd_path = '/usr/bin/ldd' else: ldd_path = LINUXBREW_LDD_PATH ldd_result = run_program([ldd_path, elf_file_path], error_ok=True) dependencies = set() if ldd_result.returncode != 0: # Interestingly, the below error message is printed to stdout, not stderr. if ldd_result.stdout == 'not a dynamic executable': logging.debug( "Not a dynamic executable: {}, ignoring dependency tracking" .format(elf_file_path)) return dependencies raise RuntimeError(ldd_result.error_msg) for ldd_output_line in ldd_result.stdout.split("\n"): resolved_dep_match = RESOLVED_DEP_RE.match(ldd_output_line) if resolved_dep_match: lib_name = resolved_dep_match.group(1) lib_resolved_path = os.path.realpath( resolved_dep_match.group(2)) dependencies.add( Dependency(lib_name, lib_resolved_path, elf_file_path, self.context)) tokens = ldd_output_line.split() if len(tokens) >= 4 and tokens[1:4] == ['=>', 'not', 'found']: missing_lib_name = tokens[0] raise RuntimeError("Library not found for '{}': {}".format( elf_file_path, missing_lib_name)) # If we matched neither RESOLVED_DEP_RE or the "not found" case, that is still fine, # e.g. there could be a line of the following form in the ldd output: # linux-vdso.so.1 => (0x00007ffc0f9d2000) return dependencies
def run_patchelf(args: List[str]) -> ProgramResult: patchelf_path = None if linuxbrew_home: patchelf_path = linuxbrew_home.patchelf_path if not patchelf_path: patchelf_path = 'patchelf' patchelf_cmd_line = [patchelf_path] + args logging.debug(f"Running patchelf: {shlex_join(patchelf_cmd_line)}") patchelf_result = run_program(patchelf_cmd_line, error_ok=True) if patchelf_result.returncode != 0 and not any( msg in patchelf_result.stderr for msg in [ 'cannot find section .interp', 'cannot find section .dynamic', PATCHELF_NOT_AN_ELF_EXECUTABLE, 'missing ELF header' ]): raise RuntimeError(patchelf_result.error_msg) return patchelf_result
def find_elf_dependencies(self, elf_file_path): """ Run ldd on the given ELF file and find libraries that it depends on. Also run patchelf and get the dynamic linker used by the file. @param elf_file_path: ELF file (executable/library) path """ elf_file_path = os.path.realpath(elf_file_path) if SYSTEM_LIBRARY_PATH_RE.match(elf_file_path): ldd_path = '/usr/bin/ldd' else: ldd_path = LINUXBREW_LDD_PATH ldd_result = run_program([ldd_path, elf_file_path], error_ok=True) dependencies = set() if ldd_result.returncode != 0: # Interestingly, the below error message is printed to stdout, not stderr. if ldd_result.stdout == 'not a dynamic executable': logging.debug( "Not a dynamic executable: {}, ignoring dependency tracking".format( elf_file_path)) return dependencies raise RuntimeError(ldd_result.error_msg) for ldd_output_line in ldd_result.stdout.split("\n"): resolved_dep_match = RESOLVED_DEP_RE.match(ldd_output_line) if resolved_dep_match: lib_name = resolved_dep_match.group(1) lib_resolved_path = os.path.realpath(resolved_dep_match.group(2)) dependencies.add(Dependency(lib_name, lib_resolved_path, elf_file_path, self.context)) tokens = ldd_output_line.split() if len(tokens) >= 4 and tokens[1:4] == ['=>', 'not', 'found']: missing_lib_name = tokens[0] raise RuntimeError("Library not found for '{}': {}".format( elf_file_path, missing_lib_name)) # If we matched neither RESOLVED_DEP_RE or the "not found" case, that is still fine, # e.g. there could be a line of the following form in the ldd output: # linux-vdso.so.1 => (0x00007ffc0f9d2000) return dependencies
def get_linuxbrew_dir(): if not sys.platform.startswith('linux'): return None global linuxbrew_dir if not linuxbrew_dir: find_script_result = run_program(os.path.join( YB_SRC_ROOT, 'build-support', 'find_linuxbrew.sh')) linuxbrew_dir = find_script_result.stdout.strip() if not os.path.isdir(linuxbrew_dir) and os.path.exists('/etc/centos-release'): raise RuntimeError( ("Directory returned by the '{}' script does not exist: '{}'. " + "This is only an error on CentOS. Details: {}").format( find_script_result.program_path, linuxbrew_dir, find_script_result)) return linuxbrew_dir
def get_linuxbrew_dir(): if not sys.platform.startswith('linux'): return None global linuxbrew_dir if not linuxbrew_dir: find_script_result = run_program(os.path.join( YB_SRC_ROOT, 'build-support', 'find_linuxbrew.sh')) linuxbrew_dir = find_script_result.stdout.strip() if not os.path.isdir(linuxbrew_dir): raise RuntimeError( ("Directory returned by the '{}' script does not exist: '{}'. " + "Details: {}").format( find_script_result.program_path, linuxbrew_dir, find_script_result)) return linuxbrew_dir
def __init__(self, build_root=None): old_build_root = os.environ.get('BUILD_ROOT') if build_root is not None: os.environ['BUILD_ROOT'] = build_root else: build_root = os.environ.get('BUILD_ROOT') self.linuxbrew_dir = None self.linuxbrew_link_target = None self.cellar_glibc_dir = None self.ldd_path = None self.ld_so_path = None self.patchelf_path = None try: find_script_result = run_program( os.path.join(YB_SRC_ROOT, 'build-support', 'find_linuxbrew.sh')) linuxbrew_dir = find_script_result.stdout.strip() if not linuxbrew_dir: return if not os.path.isdir(linuxbrew_dir) and os.path.exists( '/etc/centos-release'): raise RuntimeError(( "Directory returned by the '{}' script does not exist: '{}'. " + "This is only an error on CentOS. Details: {}").format( find_script_result.program_path, linuxbrew_dir, find_script_result)) self.linuxbrew_dir = os.path.realpath(linuxbrew_dir) # Directories derived from the Linuxbrew top-level one. self.linuxbrew_link_target = os.path.realpath(linuxbrew_dir) self.cellar_glibc_dir = safe_path_join(self.linuxbrew_dir, 'Cellar', 'glibc') self.ldd_path = safe_path_join(self.linuxbrew_dir, 'bin', 'ldd') self.ld_so_path = safe_path_join(self.linuxbrew_dir, 'lib', 'ld.so') self.patchelf_path = safe_path_join(self.linuxbrew_dir, 'bin', 'patchelf') finally: if old_build_root is None: if 'BUILD_ROOT' in os.environ: del os.environ['BUILD_ROOT'] else: os.environ['BUILD_ROOT'] = old_build_root
def run_otool(self, parameter: str, filename: str) -> Optional[str]: """ Run otool to extract information from an object file. Returns the command's output to stdout, or an empty string if filename is not a valid object file. Parameter must include the dash. """ result = run_program(['otool', parameter, filename], error_ok=True) if result.stdout.endswith('is not an object file') or \ result.stderr.endswith('The file was not recognized as a valid object file'): logging.info( "Unable to run 'otool %s %s'. File '%s' is not an object file", filename, parameter, filename) return None if result.returncode != 0: raise RuntimeError("Unexpected error running 'otool -l %s': '%s'", filename, result.stderr) return result.stdout
def collect_cpp_tests(max_tests, cpp_test_program_filter, cpp_test_program_re_str): """ Collect C++ test programs to run. @param max_tests: maximum number of tests to run. Used in debugging. @param cpp_test_program_filter: a collection of C++ test program names to be used as a filter @param cpp_test_program_re_str: a regular expression string to be used as a filter for the set of C++ test programs. """ global_conf = yb_dist_tests.global_conf logging.info("Collecting the list of C++ test programs") start_time_sec = time.time() ctest_cmd_result = command_util.run_program([ '/bin/bash', '-c', 'cd "{}" && YB_LIST_CTEST_TESTS_ONLY=1 ctest -j8 --verbose'.format( global_conf.build_root) ]) test_programs = [] for line in ctest_cmd_result.stdout.split("\n"): re_match = CTEST_TEST_PROGRAM_RE.match(line) if re_match: rel_ctest_prog_path = os.path.relpath(re_match.group(1), global_conf.build_root) test_programs.append(rel_ctest_prog_path) test_programs = sorted(set(test_programs)) elapsed_time_sec = time.time() - start_time_sec logging.info("Collected %d test programs in %.2f sec" % (len(test_programs), elapsed_time_sec)) if cpp_test_program_re_str: cpp_test_program_re = re.compile(cpp_test_program_re_str) test_programs = [ test_program for test_program in test_programs if cpp_test_program_re.search(test_program) ] logging.info( "Filtered down to %d test programs using regular expression '%s'" % (len(test_programs), cpp_test_program_re_str)) if cpp_test_program_filter: cpp_test_program_filter = set(cpp_test_program_filter) unfiltered_test_programs = test_programs # test_program contains test paths relative to the root directory (including directory # names), and cpp_test_program_filter contains basenames only. test_programs = sorted( set([ test_program for test_program in test_programs if os.path.basename(test_program) in cpp_test_program_filter ])) logging.info( "Filtered down to %d test programs using the list from test conf file" % len(test_programs)) if unfiltered_test_programs and not test_programs: # This means we've filtered the list of C++ test programs down to an empty set. logging.info(( "NO MATCHING C++ TEST PROGRAMS FOUND! Test programs from conf file: {}, " "collected from ctest before filtering: {}").format( set_to_comma_sep_str(cpp_test_program_filter), set_to_comma_sep_str(unfiltered_test_programs))) if max_tests and len(test_programs) > max_tests: logging.info( "Randomly selecting {} test programs out of {} possible".format( max_tests, len(test_programs))) random.shuffle(test_programs) test_programs = test_programs[:max_tests] if not test_programs: logging.info("Found no test programs") return [] fine_granularity_gtest_programs = [] one_shot_test_programs = [] for test_program in test_programs: if is_one_shot_test(test_program): one_shot_test_programs.append(test_program) else: fine_granularity_gtest_programs.append(test_program) logging.info( ("Found {} gtest test programs where tests will be run separately, " "{} test programs to be run on one shot").format( len(fine_granularity_gtest_programs), len(one_shot_test_programs))) test_programs = fine_granularity_gtest_programs logging.info( "Collecting gtest tests for {} test programs where tests will be run separately" .format(len(test_programs))) start_time_sec = time.time() all_test_programs = fine_granularity_gtest_programs + one_shot_test_programs if len(all_test_programs) <= 5: app_name_details = [ 'test programs: [{}]'.format(', '.join(all_test_programs)) ] else: app_name_details = ['{} test programs'.format(len(all_test_programs))] init_spark_context(app_name_details) set_global_conf_for_spark_jobs() # Use fewer "slices" (tasks) than there are test programs, in hope to get some batching. num_slices = (len(test_programs) + 1) / 2 all_test_descriptor_lists = run_spark_action( lambda: spark_context.parallelize(test_programs, numSlices=num_slices). map(parallel_list_test_descriptors).collect()) elapsed_time_sec = time.time() - start_time_sec test_descriptor_strs = one_shot_test_programs + [ test_descriptor_str for test_descriptor_str_list in all_test_descriptor_lists for test_descriptor_str in test_descriptor_str_list ] logging.info("Collected the list of %d gtest tests in %.2f sec" % (len(test_descriptor_strs), elapsed_time_sec)) return [yb_dist_tests.TestDescriptor(s) for s in test_descriptor_strs]
def find_elf_dependencies(elf_file_path): """ Run ldd on the given ELF file and find libraries that it depends on. Also run patchelf and get the dynamic linker used by the file. @param elf_file_path: ELF file (executable/library) path """ elf_file_path = realpath(elf_file_path) if elf_file_path.startswith('/usr/') or elf_file_path.startswith('/lib64/'): ldd_path = '/usr/bin/ldd' else: ldd_path = LINUXBREW_LDD_PATH ldd_result = run_program([ldd_path, elf_file_path], error_ok=True) dependencies = set() if ldd_result.returncode != 0: # Interestingly, the below error message is printed to stdout, not stderr. if ldd_result.stdout == 'not a dynamic executable': logging.debug( "Not a dynamic executable: {}, ignoring dependency tracking".format(elf_file_path)) return dependencies raise RuntimeError(ldd_result.error_msg) for ldd_output_line in ldd_result.stdout.split("\n"): m = RESOLVED_DEP_RE.match(ldd_output_line) if m: lib_name = m.group(1) lib_resolved_path = realpath(m.group(2)) dependencies.add(Dependency(lib_name, lib_resolved_path)) tokens = ldd_output_line.split() if len(tokens) >= 4 and tokens[1:4] == ['=>', 'not', 'found']: missing_lib_name = tokens[0] raise RuntimeError("Library not found for '{}': {}".format( elf_file_path, missing_lib_name)) # If we matched neither RESOLVED_DEP_RE or the "not found" case, that is still fine, # e.g. there could be a line of the following form in the ldd output: # linux-vdso.so.1 => (0x00007ffc0f9d2000) elf_basename = path_basename(elf_file_path) elf_dirname = path_dirname(elf_file_path) if elf_basename.startswith('libsasl2.'): # TODO: don't package Berkeley DB with the product -- it has an AGPL license. for libdb_so_name in ['libdb.so', 'libdb-5.so']: libdb_so_path = path_join(path_dirname(elf_file_path), 'libdb.so') if os.path.exists(libdb_so_path): dependencies.add(Dependency('libdb.so', libdb_so_path, via_dlopen=True)) sasl_plugin_dir = '/usr/lib64/sasl2' for sasl_lib_name in os.listdir(sasl_plugin_dir): if sasl_lib_name.endswith('.so'): dependencies.add( Dependency(sasl_lib_name, realpath(path_join(sasl_plugin_dir, sasl_lib_name)))) if elf_basename.startswith('libc-'): # glibc loads a lot of libns_... libraries using dlopen. for libnss_lib in glob.glob(os.path.join(elf_dirname, 'libnss_*')): if re.search(r'([.]so|\d+)$', libnss_lib): dependencies.add(Dependency(path_basename(libnss_lib), libnss_lib, via_dlopen=True)) return dependencies
def remove_rpaths(self, filename, rpaths): for rpath in rpaths: run_program( ['install_name_tool', '-delete_rpath', rpath, filename]) logging.info('Successfully removed rpath %s from %s', rpath, filename)
def remove_rpaths(self, filename, rpaths): for rpath in rpaths: run_program( ["install_name_tool", "-delete_rpath", rpath, filename]) logging.info("Successfully removed rpath %s from %s" % (rpath, filename))
def collect_cpp_tests(max_tests, cpp_test_program_filter, cpp_test_program_re_str): """ Collect C++ test programs to run. @param max_tests: maximum number of tests to run. Used in debugging. @param cpp_test_program_filter: a collection of C++ test program names to be used as a filter @param cpp_test_program_re_str: a regular expression string to be used as a filter for the set of C++ test programs. """ global_conf = yb_dist_tests.global_conf logging.info("Collecting the list of C++ test programs") start_time_sec = time.time() ctest_cmd_result = command_util.run_program( ['/bin/bash', '-c', 'cd "{}" && YB_LIST_CTEST_TESTS_ONLY=1 ctest -j8 --verbose'.format( global_conf.build_root)]) test_programs = [] for line in ctest_cmd_result.stdout.split("\n"): re_match = CTEST_TEST_PROGRAM_RE.match(line) if re_match: rel_ctest_prog_path = os.path.relpath(re_match.group(1), global_conf.build_root) test_programs.append(rel_ctest_prog_path) test_programs = sorted(set(test_programs)) elapsed_time_sec = time.time() - start_time_sec logging.info("Collected %d test programs in %.2f sec" % ( len(test_programs), elapsed_time_sec)) if cpp_test_program_re_str: cpp_test_program_re = re.compile(cpp_test_program_re_str) test_programs = [test_program for test_program in test_programs if cpp_test_program_re.search(test_program)] logging.info("Filtered down to %d test programs using regular expression '%s'" % (len(test_programs), cpp_test_program_re_str)) if cpp_test_program_filter: cpp_test_program_filter = set(cpp_test_program_filter) unfiltered_test_programs = test_programs # test_program contains test paths relative to the root directory (including directory # names), and cpp_test_program_filter contains basenames only. test_programs = sorted(set([ test_program for test_program in test_programs if os.path.basename(test_program) in cpp_test_program_filter ])) logging.info("Filtered down to %d test programs using the list from test conf file" % len(test_programs)) if unfiltered_test_programs and not test_programs: # This means we've filtered the list of C++ test programs down to an empty set. logging.info( ("NO MATCHING C++ TEST PROGRAMS FOUND! Test programs from conf file: {}, " "collected from ctest before filtering: {}").format( set_to_comma_sep_str(cpp_test_program_filter), set_to_comma_sep_str(unfiltered_test_programs))) if max_tests and len(test_programs) > max_tests: logging.info("Randomly selecting {} test programs out of {} possible".format( max_tests, len(test_programs))) random.shuffle(test_programs) test_programs = test_programs[:max_tests] if not test_programs: logging.info("Found no test programs") return [] fine_granularity_gtest_programs = [] one_shot_test_programs = [] for test_program in test_programs: if is_one_shot_test(test_program): one_shot_test_programs.append(test_program) else: fine_granularity_gtest_programs.append(test_program) logging.info(("Found {} gtest test programs where tests will be run separately, " "{} test programs to be run on one shot").format( len(fine_granularity_gtest_programs), len(one_shot_test_programs))) test_programs = fine_granularity_gtest_programs logging.info( "Collecting gtest tests for {} test programs where tests will be run separately".format( len(test_programs))) start_time_sec = time.time() all_test_programs = fine_granularity_gtest_programs + one_shot_test_programs if len(all_test_programs) <= 5: app_name_details = ['test programs: [{}]'.format(', '.join(all_test_programs))] else: app_name_details = ['{} test programs'.format(len(all_test_programs))] init_spark_context(app_name_details) set_global_conf_for_spark_jobs() # Use fewer "slices" (tasks) than there are test programs, in hope to get some batching. num_slices = (len(test_programs) + 1) / 2 all_test_descriptor_lists = run_spark_action( lambda: spark_context.parallelize( test_programs, numSlices=num_slices).map(parallel_list_test_descriptors).collect() ) elapsed_time_sec = time.time() - start_time_sec test_descriptor_strs = one_shot_test_programs + [ test_descriptor_str for test_descriptor_str_list in all_test_descriptor_lists for test_descriptor_str in test_descriptor_str_list] logging.info("Collected the list of %d gtest tests in %.2f sec" % ( len(test_descriptor_strs), elapsed_time_sec)) return [yb_dist_tests.TestDescriptor(s) for s in test_descriptor_strs]
def main(): """ Main entry point. Returns True on success. """ parser = argparse.ArgumentParser( description='Fix rpath for YugaByte and third-party binaries.') parser.add_argument('--verbose', dest='verbose', action='store_true', help='Verbose output') args = parser.parse_args() log_level = logging.INFO if args.verbose: log_level = logging.DEBUG logging.basicConfig( level=log_level, format="[" + os.path.basename(__file__) + "] %(asctime)s %(levelname)s: %(message)s") is_success = True thirdparty_dir = os.path.realpath( os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'thirdparty')) elf_files = [] if sys.platform == 'darwin': logging.info("fix_rpath.py does not do anything on Mac OS X") return True logging.info("Fixing rpath/runpath for the third-party binaries") if os.path.isdir(os.path.join(thirdparty_dir, 'installed', 'common')): logging.info("Using new directory hierarchy layout under thirdparty/installed") # This is an indication that we are using a new-style directory layout in # thirdparty/installed. prefix_dirs = [ os.path.join(thirdparty_dir, 'installed', prefix_rel_dir) for prefix_rel_dir in ['common', 'tsan', 'uninstrumented'] ] + [os.path.join(thirdparty_dir, 'clang-toolchain')] + [ os.path.join(thirdparty_dir, 'build', instrumentation_type, glob_pattern) for instrumentation_type in ['uninstrumented', 'tsan'] for glob_pattern in ['gflags-*', 'snappy-*'] ] else: # Old-style directory structure, to be removed once migration is complete. logging.info("Using old directory hierarchy layout under thirdparty/installed*") prefix_dirs = [ os.path.join(thirdparty_dir, prefix_rel_dir) for prefix_rel_dir in ['installed', 'installed-deps', 'installed-deps-tsan'] ] global linuxbrew_dir linuxbrew_dir = get_linuxbrew_dir() assert linuxbrew_dir # We need patchelf as it is more flexible than chrpath and can in fact increase the length of # rpath if there is enough space. This could happen if rpath used to be longer but was reduced # by a previous patchelf / chrpath command. Another reason we need patchelf is to set the # interpreter (dynamic linker) path on binaries that used to point to Linuxbrew's dynamic linker # installed in a different location. global patchelf_path patchelf_possible_paths = ['/usr/bin/patchelf', os.path.join(linuxbrew_dir, 'bin', 'patchelf')] for patchelf_path_candidate in patchelf_possible_paths: if os.path.isfile(patchelf_path_candidate): patchelf_path = patchelf_path_candidate break if not patchelf_path: logging.error("Could not find patchelf in any of the paths: {}".format( patchelf_possible_paths)) return False num_binaries_no_rpath_change = 0 num_binaries_updated_rpath = 0 dirs_to_search = [] for pattern in prefix_dirs: dirs_to_search += glob.glob(pattern) logging.info( "Fixing rpath/interpreter path for ELF files in the following directories:{}".format( ("\n" + " " * 8).join([""] + dirs_to_search))) for prefix_dir in dirs_to_search: for file_dir, dirs, file_names in os.walk(prefix_dir): for file_name in file_names: if file_name.endswith('_patchelf_tmp'): continue binary_path = os.path.join(file_dir, file_name) if ((os.access(binary_path, os.X_OK) or file_name.endswith('.so') or '.so.' in file_name) and not os.path.islink(binary_path) and not file_name.endswith('.sh') and not file_name.endswith('.py') and not file_name.endswith('.la')): # Invoke readelf to read the current rpath. readelf_subprocess = subprocess.Popen( ['/usr/bin/readelf', '-d', binary_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) readelf_stdout, readelf_stderr = readelf_subprocess.communicate() if 'Not an ELF file' in readelf_stderr: logging.debug("Not an ELF file: '%s', skipping" % binary_path) continue if not update_interpreter(binary_path): is_success = False if readelf_subprocess.returncode != 0: logging.warn("readelf returned exit code %d for '%s', stderr:\n%s" % ( readelf_subprocess.returncode, binary_path, readelf_stderr)) return False elf_files.append(binary_path) # Collect and process all rpath entries and resolve $ORIGIN (the directory of # the executable or binary itself). We will put $ORIGIN back later. original_rpath_entries = [] # Entries with $ORIGIN left as is original_real_rpath_entries = [] # Entries with $ORIGIN substituted for readelf_out_line in readelf_stdout.split('\n'): matcher = READELF_RPATH_RE.search(readelf_out_line) if matcher: for rpath_entry in matcher.groups()[0].split(':'): # Remove duplicate entries but preserve order. add_if_absent(original_rpath_entries, rpath_entry) rpath_entry = rpath_entry.replace('$ORIGIN', file_dir) rpath_entry = rpath_entry.replace('${ORIGIN}', file_dir) # Ignore a special kind of rpath entry that we add just to increase # the number of bytes reserved for rpath. if not rpath_entry.startswith( '/tmp/making_sure_we_have_enough_room_to_set_rpath_later'): add_if_absent(original_real_rpath_entries, os.path.realpath(rpath_entry)) new_relative_rpath_entries = [] logging.debug("Original rpath from '%s': %s" % ( binary_path, ':'.join(original_rpath_entries))) # A special case: the glog build ends up missing the rpath entry for gflags. if file_name.startswith('libglog.'): add_if_absent(original_real_rpath_entries, os.path.realpath(os.path.dirname(binary_path))) if not original_real_rpath_entries: logging.debug("No rpath entries in '%s', skipping", binary_path) continue new_real_rpath_entries = [] for rpath_entry in original_real_rpath_entries: real_rpath_entry = os.path.realpath(rpath_entry) linuxbrew_rpath_match = LINUXBREW_PATH_RE.match(real_rpath_entry) if linuxbrew_rpath_match: # This is a Linuxbrew directory relative to the home directory of the # user that built the third-party package. We need to substitute the # current user's home directory, or the directory containing the # .linuxbrew-yb-build directory, into that instead. new_rpath_entry = os.path.join(linuxbrew_dir, linuxbrew_rpath_match.group(1)) else: rel_path = os.path.relpath(real_rpath_entry, os.path.realpath(file_dir)) # Normalize the new rpath entry by making it relative to $ORIGIN. This # is only necessary for third-party libraries that may need to be moved # from place to place. if rel_path == '.': new_rpath_entry = '$ORIGIN' else: new_rpath_entry = '$ORIGIN/' + rel_path if len(new_rpath_entry) > 2 and new_rpath_entry.endswith('/.'): new_rpath_entry = new_rpath_entry[:-2] # Remove duplicate entries but preserve order. There may be further # deduplication at this point, because we may have entries that only differ # in presence/absence of a trailing slash, which only get normalized here. add_if_absent(new_relative_rpath_entries, new_rpath_entry) add_if_absent(new_real_rpath_entries, real_rpath_entry) # We have to make rpath entries relative for third-party dependencies. if original_rpath_entries == new_relative_rpath_entries: logging.debug("No change in rpath entries for '%s' (already relative)", binary_path) num_binaries_no_rpath_change += 1 continue add_if_absent(new_relative_rpath_entries, os.path.join(linuxbrew_dir, 'lib')) new_rpath_str = ':'.join(new_relative_rpath_entries) # When using patchelf, this will actually set RUNPATH as a newer replacement # for RPATH, with the difference being that RUNPATH can be overwritten by # LD_LIBRARY_PATH, but we're OK with it. # Note: pipes.quote is a way to escape strings for inclusion in shell # commands. set_rpath_cmd = [patchelf_path, '--set-rpath', new_rpath_str, binary_path] logging.debug("Setting rpath on '%s' to '%s'" % (binary_path, new_rpath_str)) for i in range(10): set_rpath_result = run_program(set_rpath_cmd, error_ok=True) if set_rpath_result.returncode == 0 or \ 'open: Resource temporarily unavailable' not in set_rpath_result.stderr: break logging.info( "Re-trying to set rpath on '{}' (resource temporarily unavailable)", binary_path) if set_rpath_result.returncode != 0: logging.warn( "Could not set rpath on '{}': exit code {}, command: {}", binary_path, set_rpath_result.returncode, set_rpath_cmd) logging.warn("patchelf stderr: " + set_rpath_result.stderr) is_success = False num_binaries_updated_rpath += 1 logging.info("Number of binaries with no rpath change: {}, updated rpath: {}".format( num_binaries_no_rpath_change, num_binaries_updated_rpath)) could_resolve_all = True logging.info("Checking if all libraries can be resolved") for binary_path in elf_files: all_libs_found, missing_libs = run_ldd(binary_path, report_missing_libs=True) if not all_libs_found: could_resolve_all = False if could_resolve_all: logging.info("All libraries resolved successfully!") else: logging.error("Some libraries could not be resolved.") return is_success and could_resolve_all
def change_permissions(mode: str) -> None: logging.info( "Changing permissions recursively on directory '%s': %s", tmp_distribution_dir, mode) cmd_line = ['chmod', '-R', mode, tmp_distribution_dir] run_program(cmd_line, cwd=tmp_parent_dir, log_command=True)
def remove_rpaths(self, filename: str, rpaths: List[str]) -> None: for rpath in rpaths: run_program( ['install_name_tool', '-delete_rpath', rpath, filename]) logging.debug('Successfully removed rpath %s from %s', rpath, filename)
def collect_cpp_tests(max_tests, cpp_test_program_re_str): global_conf = yb_dist_tests.global_conf logging.info("Collecting the list of C++ tests") start_time_sec = time.time() ctest_cmd_result = command_util.run_program( ['/bin/bash', '-c', 'cd "{}" && YB_LIST_CTEST_TESTS_ONLY=1 ctest -j8 --verbose'.format( global_conf.build_root)]) test_programs = [] test_descriptor_strs = [] for line in ctest_cmd_result.stdout.split("\n"): re_match = CTEST_TEST_PROGRAM_RE.match(line) if re_match: rel_ctest_prog_path = os.path.relpath(re_match.group(1), global_conf.build_root) if is_one_shot_test(rel_ctest_prog_path): test_descriptor_strs.append(rel_ctest_prog_path) else: test_programs.append(rel_ctest_prog_path) elapsed_time_sec = time.time() - start_time_sec logging.info("Collected %d test programs in %.2f sec" % ( len(test_programs), elapsed_time_sec)) if cpp_test_program_re_str: cpp_test_program_re = re.compile(cpp_test_program_re_str) test_programs = [test_program for test_program in test_programs if cpp_test_program_re.search(test_program)] logging.info("Filtered down to %d test programs using regular expression '%s'" % (len(test_programs), cpp_test_program_re_str)) if max_tests and len(test_programs) > max_tests: logging.info("Randomly selecting {} test programs out of {} possible".format( max_tests, len(test_programs))) random.shuffle(test_programs) test_programs = test_programs[:max_tests] if not test_programs: logging.info("Found no test programs") return [] logging.info("Collecting gtest tests for {} test programs".format(len(test_programs))) start_time_sec = time.time() if len(test_programs) <= 3: app_name_details = ['test programs: [{}]'.format(', '.join(test_programs))] else: app_name_details = ['{} test programs'.format(len(test_programs))] init_spark_context(app_name_details) set_global_conf_for_spark_jobs() # Use fewer "slices" (tasks) than there are test programs, in hope to get some batching. num_slices = (len(test_programs) + 1) / 2 all_test_descriptor_lists = spark_context.parallelize( test_programs, numSlices=num_slices).map(parallel_list_test_descriptors).collect() elapsed_time_sec = time.time() - start_time_sec test_descriptor_strs += [ test_descriptor_str for test_descriptor_str_list in all_test_descriptor_lists for test_descriptor_str in test_descriptor_str_list] logging.info("Collected the list of %d gtest tests in %.2f sec" % ( len(test_descriptor_strs), elapsed_time_sec)) return [yb_dist_tests.TestDescriptor(s) for s in test_descriptor_strs]
def remove_rpaths(self, filename, rpaths): for rpath in rpaths: run_program(['install_name_tool', '-delete_rpath', rpath, filename]) logging.info('Successfully removed rpath %s from %s', rpath, filename)