示例#1
0
class LinkHelper:
    """
    Helps with creating and running a custom linking command for YugabyteDB outside of the build
    system.
    """
    dep_graph: DependencyGraph
    initial_node: Node

    # Arguments to the linker in the original command that produces the initial_node target.
    # Does not include the compiler driver executable.
    original_link_args: List[str]

    build_root: str
    build_paths: BuildPaths
    llvm_path: str
    thirdparty_path: str
    clang_cpp_path: str

    # Dependency graph nodes corresponding to the object files present in the original linker
    # command. Used for deduplication.
    obj_file_graph_nodes: Set[Node]

    new_args: LinkCommand

    # Build directory of the Postgres backend.
    pg_backend_build_dir: str

    # The command for linking the yb_pgbackend library.
    yb_pgbackend_link_cmd: List[str]

    lto_output_suffix: Optional[str]

    # Populated by consume_original_link_cmd.
    final_output_name: str

    def __init__(self, dep_graph: DependencyGraph, initial_node: Node,
                 lto_output_suffix: Optional[str]) -> None:
        self.dep_graph = dep_graph
        self.initial_node = initial_node

        self.build_root = self.dep_graph.conf.build_root
        self.build_paths = BuildPaths(self.build_root)

        self.llvm_path = self.build_paths.get_llvm_path()
        self.thirdparty_path = self.build_paths.get_thirdparty_path()
        self.clang_cpp_path = self.build_paths.get_llvm_tool_path('clang++')

        assert initial_node.link_cmd
        self.original_link_args = process_original_link_cmd(
            initial_node.link_cmd)
        self.static_lib_paths = get_static_lib_paths(self.thirdparty_path)
        self.new_args = LinkCommand([self.clang_cpp_path])
        self.obj_file_graph_nodes = set()
        self.pg_backend_build_dir, self.yb_pgbackend_link_cmd = get_yb_pgbackend_link_cmd(
            self.build_root)

        self.lto_output_suffix = lto_output_suffix

    def convert_to_static_lib(self, arg: str) -> Optional[str]:
        """
        Given an argument to the original linker command, try to interpret it as a library, either
        specified as a shared library path, or using -l... syntax, and return the corresponding
        static library path if available.
        """
        if arg.startswith('/') and arg.endswith('.so'):
            arg_static_prefix = arg[:-3]

            static_found = False
            for suffix in ['.a', '-s.a']:
                arg_static = arg_static_prefix + suffix
                if os.path.exists(arg_static):
                    logging.info(
                        "Using static library %s instead of shared library %s",
                        arg_static, arg)
                    return arg_static
            logging.info("Did not find static library corresponding to %s",
                         arg)

        if arg.startswith('-l'):
            static_found = False
            logging.info("Looking for static lib for: %s", arg)
            lib_name = arg[2:]
            for static_lib_path in self.static_lib_paths:
                static_lib_basename = os.path.basename(static_lib_path)
                if (static_lib_basename == 'lib' + lib_name + '.a'
                        or static_lib_basename == 'lib' + lib_name + '-s.a'):
                    logging.info("Found static lib for %s: %s", lib_name,
                                 static_lib_path)
                    return static_lib_path
            logging.info("Did not find a static lib for %s", lib_name)

        if arg.endswith('.so') or '.so.' in arg:
            logging.info("Still using a shared library: %s", arg)

        return None

    def process_arg(self, arg: str) -> None:
        if arg in SKIPPED_ARGS:
            logging.info("Skipping argument: %s", arg)
            return

        new_arg = self.convert_to_static_lib(arg)
        if new_arg:
            if not self.new_args.contains(new_arg):
                self.new_args.add_new_arg(new_arg)
        else:
            self.new_args.add_new_arg(arg)

    def consume_original_link_cmd(self) -> None:
        """
        Goes over the original linker command and reuses some of its arguments for the new command.
        """
        with WorkDirContext(self.build_root):
            expect_output_name = False
            output_name: Optional[str] = None
            for arg in self.original_link_args:
                if arg == '-o':
                    expect_output_name = True
                    continue
                if expect_output_name:
                    if output_name:
                        raise ValueError(
                            "Found multiple output names in the original link command: "
                            "%s and %s" % (output_name, arg))
                    output_name = arg
                    expect_output_name = False
                    continue
                expect_output_name = False

                if is_yb_library(arg):
                    logging.info("Skipping YB library: %s", arg)
                    continue

                if arg.endswith('.cc.o'):
                    # E.g. tablet_server_main.cc.o.
                    # Remember this node for later deduplication.
                    self.obj_file_graph_nodes.add(
                        self.dep_graph.find_node(os.path.realpath(arg)))

                self.process_arg(arg)

            if not output_name:
                raise ValueError(
                    "Did not find an output name in the original link command")
            self.final_output_name = os.path.abspath(output_name)
            logging.info("Final output file name: %s", self.final_output_name)
            if self.lto_output_suffix is not None:
                self.final_output_name += self.lto_output_suffix
            self.new_args.extend(['-o', self.final_output_name])

    def add_leaf_object_files(self) -> None:
        """
        Goes over all the object files that the original node transitively depends on, and adds
        them to the link command if they have not already been added.
        """

        transitive_deps = self.initial_node.get_recursive_deps(
            skip_node_types=set([NodeType.EXECUTABLE]))
        with WorkDirContext(self.build_root):
            # Sort nodes by path for determinism.
            for node in sorted(list(transitive_deps),
                               key=lambda dep: dep.path):
                if node in self.obj_file_graph_nodes:
                    # Dedup .cc.o files already existing on the command line.
                    continue

                if node.node_type == NodeType.OBJECT:
                    self.new_args.add_new_arg(node.path)

            for arg in self.yb_pgbackend_link_cmd:
                if arg.endswith('.o'):
                    if os.path.basename(arg) == 'main_cpp_wrapper.cc.o':
                        # TOOD: why is this file even linked into libyb_pgbackend?
                        continue
                    self.new_args.append(
                        os.path.join(self.pg_backend_build_dir, arg))
                    continue
                if (arg.startswith('-l') and not self.new_args.contains(arg)
                        and not arg.startswith('-lyb_')):
                    self.process_arg(arg)

    def add_final_args(self, lto_type: str) -> None:
        assert lto_type in ['full', 'thin']
        self.new_args.extend([
            '-L%s' % os.path.join(self.build_root, 'postgres', 'lib'),
            '-l:libpgcommon.a',
            '-l:libpgport.a',
            '-l:libpq.a',
            '-fwhole-program',
            '-Wl,-v',
            '-nostdlib++',
            '-flto=' + lto_type,
        ])

        for lib_name in ['libc++.a', 'libc++abi.a']:
            self.new_args.append(
                os.path.join(self.thirdparty_path, 'installed',
                             'uninstrumented', 'libcxx', 'lib', lib_name))

        with WorkDirContext(self.build_root):
            self.write_link_cmd_file(self.final_output_name +
                                     '_lto_link_cmd_args.txt')

    def run_linker(self) -> None:
        with WorkDirContext(self.build_root):
            start_time_sec = time.time()
            logging.info("Running linker")
            try:
                subprocess.check_call(self.new_args.args)
            except subprocess.CalledProcessError as ex:
                # Avoid printing the extremely long command line.
                logging.error("Linker returned exit code %d", ex.returncode)
            elapsed_time_sec = time.time() - start_time_sec
            logging.info("Linking finished in %.1f sec", elapsed_time_sec)

    def write_link_cmd_file(self, out_path: str) -> None:
        logging.info(
            "Writing the linker command line (one argument per line) to %s",
            os.path.abspath(out_path))
        write_file('\n'.join(self.new_args.args), out_path)
示例#2
0
class LinkHelper:
    """
    Helps with creating and running a custom linking command for YugabyteDB outside of the build
    system.
    """
    dep_graph: DependencyGraph
    initial_node: Node

    # Arguments to the linker in the original command that produces the initial_node target.
    # Does not include the compiler driver executable.
    original_link_args: List[str]

    build_root: str
    build_paths: BuildPaths
    llvm_path: str
    thirdparty_path: str
    clang_cpp_path: str

    # Dependency graph nodes corresponding to the object files present in the original linker
    # command. Used for deduplication.
    obj_file_graph_nodes: Set[Node]

    new_args: LinkCommand

    # Build directory of the Postgres backend.
    pg_backend_build_dir: str

    # The command for linking the yb_pgbackend library.
    yb_pgbackend_link_cmd: List[str]

    lto_output_suffix: Optional[str]

    # Populated by consume_original_link_cmd.
    final_output_name: str

    # We look at shared library dependencies (detected using ldd) of the libraries we add, and for
    # those dependencies that fall within the third-party directory, we determine the corresponding
    # static libraries and add them to the list below so we can link with them. This is necessary
    # because in some cases, e.g. for libgssapi_krb5, the static libraries we need to add cannot be
    # determined in any other way. The dictionary below maps the static library to the list of
    # shared libraries that necessitated its addition.
    static_libs_from_ldd: Dict[str, Set[str]]

    # Set of shared library file paths for which we have already examined dependencies using ldd
    # as described above.
    processed_shared_lib_deps_for: Set[str]

    def __init__(self, dep_graph: DependencyGraph, initial_node: Node,
                 lto_output_suffix: Optional[str]) -> None:
        self.dep_graph = dep_graph
        self.initial_node = initial_node

        self.build_root = self.dep_graph.conf.build_root
        self.build_paths = BuildPaths(self.build_root)

        self.llvm_path = self.build_paths.get_llvm_path()
        self.thirdparty_path = self.build_paths.get_thirdparty_path()
        self.clang_cpp_path = self.build_paths.get_llvm_tool_path('clang++')

        assert initial_node.link_cmd
        self.original_link_args = process_original_link_cmd(
            initial_node.link_cmd)
        self.static_lib_paths = get_static_lib_paths(self.thirdparty_path)
        self.new_args = LinkCommand([self.clang_cpp_path])
        self.obj_file_graph_nodes = set()
        self.pg_backend_build_dir, self.yb_pgbackend_link_cmd = get_yb_pgbackend_link_cmd(
            self.build_root)

        self.lto_output_suffix = lto_output_suffix

        self.static_libs_from_ldd = {}
        self.processed_shared_lib_deps_for = set()

    def convert_to_static_lib(self, arg: str) -> Optional[str]:
        """
        Given an argument to the original linker command, try to interpret it as a library, either
        specified as a shared library path, or using -l... syntax, and return the corresponding
        static library path if available.
        """
        assert not is_system_lib(arg)

        if os.path.isabs(arg):
            lib_path_prefix, shared_lib_suffix = split_shared_lib_ext(arg)
            if lib_path_prefix is not None:
                static_found = False
                lib_path_prefixes: List[str] = [
                    item for item in [
                        lib_path_prefix,
                        remove_dash_numeric_suffix(lib_path_prefix)
                    ] if item is not None
                ]
                static_lib_candidates = [
                    lib_path_prefix + suffix
                    for lib_path_prefix in lib_path_prefixes
                    for suffix in STATIC_LIBRARY_SUFFIXES
                ]

                for static_lib_path in static_lib_candidates:
                    if os.path.exists(static_lib_path):
                        logging.info(
                            "Using static library %s instead of shared library %s",
                            static_lib_path, arg)
                        return static_lib_path
                raise ValueError(
                    "Did not find static library corresponding to %s" % arg)

        if arg.startswith('-l'):
            static_found = False
            logging.info("Looking for static lib for: %s", arg)
            lib_name = arg[2:]
            for static_lib_path in self.static_lib_paths:
                static_lib_basename = os.path.basename(static_lib_path)
                if any(static_lib_basename == 'lib' + lib_name + suffix
                       for suffix in STATIC_LIBRARY_SUFFIXES):
                    logging.info("Found static lib for %s: %s", lib_name,
                                 static_lib_path)
                    self.add_shared_library_dependencies(
                        find_shared_lib_from_static(static_lib_path))
                    return static_lib_path
            logging.info("Did not find a static lib for %s", lib_name)

        if is_shared_lib(arg):
            raise ValueError("Still using a shared library: %s" % arg)

        return None

    def add_shared_library_dependencies(self,
                                        shared_library_path: str) -> None:
        if shared_library_path in self.processed_shared_lib_deps_for:
            return

        self.processed_shared_lib_deps_for.add(shared_library_path)
        if not os.path.exists(shared_library_path):
            logging.info("File does ont exist, not running ldd: %s",
                         shared_library_path)
            return

        ldd_output = subprocess.check_output(['ldd', shared_library_path
                                              ]).decode('utf-8')
        for line in ldd_output.split('\n'):
            line = line.strip()
            ldd_output_line_match = LDD_OUTPUT_LINE_RE.match(line)
            if ldd_output_line_match:
                so_name = ldd_output_line_match.group(1)
                so_path = ldd_output_line_match.group(2)
                if so_path.startswith(self.thirdparty_path + '/'):
                    static_lib_path = self.convert_to_static_lib(so_path)
                    if static_lib_path:
                        if os.path.basename(
                                static_lib_path) in LIBCXX_STATIC_LIB_NAMES:
                            # Skip libc++ and libc++abi, we will add them explicitly later.
                            # All third-party libraries written in C++ will depend on these and it
                            # is not very useful to include that in the output.
                            continue
                        if static_lib_path not in self.static_libs_from_ldd:
                            self.static_libs_from_ldd[static_lib_path] = set()
                        self.static_libs_from_ldd[static_lib_path].add(
                            os.path.realpath(shared_library_path))

    def process_arg(self, arg: str) -> None:
        assert arg is not None
        if arg in SKIPPED_ARGS:
            logging.info("Skipping argument: %s", arg)
            return

        if is_system_lib(arg):
            if is_static_lib(arg):
                raise ValueError(
                    "Linking with a system static library is not allowed: %s" %
                    arg)
            if is_shared_lib(arg):
                name = os.path.basename(arg)
                if name == 'librt.so':
                    arg = '-lrt'
                else:
                    raise ValueError("System shared library: %s" % arg)
        else:
            if is_shared_lib(arg):
                self.add_shared_library_dependencies(arg)

            arg = self.convert_to_static_lib(arg) or arg

        self.new_args.add_new_arg(arg)

    def consume_original_link_cmd(self) -> None:
        """
        Goes over the original linker command and reuses some of its arguments for the new command.
        """
        with WorkDirContext(self.build_root):
            expect_output_name = False
            output_name: Optional[str] = None
            for arg in self.original_link_args:
                if arg == '-o':
                    expect_output_name = True
                    continue
                if expect_output_name:
                    if output_name:
                        raise ValueError(
                            "Found multiple output names in the original link command: "
                            "%s and %s" % (output_name, arg))
                    output_name = arg
                    expect_output_name = False
                    continue
                expect_output_name = False

                if is_yb_library(arg):
                    logging.info("Skipping YB library: %s", arg)
                    continue

                if arg.endswith('.cc.o'):
                    # E.g. tablet_server_main.cc.o.
                    # Remember this node for later deduplication.
                    self.obj_file_graph_nodes.add(
                        self.dep_graph.find_node(os.path.realpath(arg)))

                self.process_arg(arg)

            if not output_name:
                raise ValueError(
                    "Did not find an output name in the original link command")
            self.final_output_name = os.path.abspath(output_name)
            logging.info("Final output file name: %s", self.final_output_name)
            if self.lto_output_suffix is not None:
                self.final_output_name += self.lto_output_suffix
            self.new_args.extend(['-o', self.final_output_name])

    def add_leaf_object_files(self) -> None:
        """
        Goes over all the object files that the original node transitively depends on, and adds
        them to the link command if they have not already been added.
        """

        transitive_deps = self.initial_node.get_recursive_deps(
            skip_node_types=set([NodeType.EXECUTABLE]))
        with WorkDirContext(self.build_root):
            # Sort nodes by path for determinism.
            for node in sorted(list(transitive_deps),
                               key=lambda dep: dep.path):
                if node in self.obj_file_graph_nodes:
                    # Dedup .cc.o files already existing on the command line.
                    continue

                if node.node_type == NodeType.OBJECT:
                    self.new_args.add_new_arg(node.path)

            for arg in self.yb_pgbackend_link_cmd:
                if arg.endswith('.o'):
                    if os.path.basename(arg) == 'main_cpp_wrapper.cc.o':
                        # TOOD: why is this file even linked into libyb_pgbackend?
                        continue
                    self.new_args.append(
                        os.path.join(self.pg_backend_build_dir, arg))
                    continue
                if (arg.startswith('-l') and not self.new_args.contains(arg)
                        and not arg.startswith('-lyb_')):
                    self.process_arg(arg)

    def add_final_args(self, lto_type: str) -> None:
        assert lto_type in ['full', 'thin']
        for static_lib_path in sorted(self.static_libs_from_ldd):
            if not self.new_args.contains(static_lib_path):
                logging.info(
                    "Adding a static library determined using shared library dependencies: %s "
                    "(needed by: %s)",
                    static_lib_path,
                    # The static_libs_from_ldd dictionary stores the set of shared library paths
                    # that caused us to add a particular static library dependency as the value
                    # corresponding to that static library's path in the key.
                    ', '.join(
                        sorted(self.static_libs_from_ldd[static_lib_path])))
                self.new_args.append(static_lib_path)

        self.new_args.extend([
            '-L%s' % os.path.join(self.build_root, 'postgres', 'lib'),
            '-l:libpgcommon.a',
            '-l:libpgport.a',
            '-l:libpq.a',
            '-fwhole-program',
            '-Wl,-v',
            '-nostdlib++',
            # For __res_nsearch, ns_initparse, ns_parserr, ns_name_uncompress.
            # See https://github.com/yugabyte/yugabyte-db/issues/12738 for details.
            '-lresolv',
            '-flto=' + lto_type,
        ])

        for lib_name in LIBCXX_STATIC_LIB_NAMES:
            self.new_args.append(
                os.path.join(self.thirdparty_path, 'installed',
                             'uninstrumented', 'libcxx', 'lib', lib_name))

        with WorkDirContext(self.build_root):
            self.write_link_cmd_file(self.final_output_name +
                                     '_lto_link_cmd_args.txt')

    def run_linker(self) -> None:
        with WorkDirContext(self.build_root):
            start_time_sec = time.time()
            logging.info("Running linker")
            try:
                subprocess.check_call(self.new_args.as_list())
            except subprocess.CalledProcessError as ex:
                # Avoid printing the extremely long command line.
                logging.error("Linker returned exit code %d", ex.returncode)
            elapsed_time_sec = time.time() - start_time_sec
            logging.info("Linking finished in %.1f sec", elapsed_time_sec)

    def write_link_cmd_file(self, out_path: str) -> None:
        logging.info(
            "Writing the linker command line (one argument per line) to %s",
            os.path.abspath(out_path))
        write_file('\n'.join(self.new_args.as_list()), out_path)