def parse_ninja_metadata(self) -> None:
        with WorkDirContext(self.conf.build_root):
            ninja_path = os.environ.get('YB_NINJA_PATH', 'ninja')
            logging.info("Ninja executable path: %s", ninja_path)
            logging.info("Running 'ninja -t commands'")
            subprocess.check_call('{} -t commands >ninja_commands.txt'.format(
                pipes.quote(ninja_path)),
                                  shell=True)
            logging.info(
                "Parsing the output of 'ninja -t commands' for linker commands"
            )
            start_time_sec = time.time()
            self.parse_link_txt_file('ninja_commands.txt')
            logging.info("Parsing linker commands took %.1f seconds",
                         time.time() - start_time_sec)

            logging.info("Running 'ninja -t deps'")
            subprocess.check_call('{} -t deps >ninja_deps.txt'.format(
                pipes.quote(ninja_path)),
                                  shell=True)
            start_time = time.time()
            logging.info(
                "Parsing the output of 'ninja -t deps' to infer dependencies")
            logging.info("Parsing dependencies took %.1f seconds",
                         time.time() - start_time_sec)
            self.parse_depend_file('ninja_deps.txt')
示例#2
0
    def build_postgres(self):
        if self.args.clean:
            self.clean_postgres()

        mkdir_p(self.pg_build_root)

        self.set_env_vars('configure')
        saved_build_stamp = self.get_saved_build_stamp()
        initial_build_stamp = self.get_build_stamp(include_env_vars=True)
        initial_build_stamp_no_env = self.get_build_stamp(include_env_vars=False)
        logging.info("PostgreSQL build stamp:\n%s", initial_build_stamp)
        if initial_build_stamp == saved_build_stamp:
            logging.info(
                "PostgreSQL is already up-to-date in directory %s, not rebuilding.",
                self.pg_build_root)
            return
        with WorkDirContext(self.pg_build_root):
            self.sync_postgres_source()
            if os.environ.get('YB_PG_SKIP_CONFIGURE', '0') != '1':
                self.configure_postgres()
            self.make_postgres()

        final_build_stamp_no_env = self.get_build_stamp(include_env_vars=False)
        if final_build_stamp_no_env == initial_build_stamp_no_env:
            logging.info("Updating build stamp file at %s", self.build_stamp_path)
            with open(self.build_stamp_path, 'w') as build_stamp_file:
                build_stamp_file.write(initial_build_stamp)
        else:
            logging.warning("PostgreSQL build stamp changed during the build! Not updating.")
示例#3
0
def link_whole_program(dep_graph: DependencyGraph,
                       initial_nodes: Iterable[Node],
                       link_cmd_out_file: Optional[str], run_linker: bool,
                       lto_output_suffix: Optional[str],
                       lto_type: str) -> None:
    initial_node_list = list(initial_nodes)
    assert len(initial_node_list) == 1
    initial_node = initial_node_list[0]

    link_helper = LinkHelper(dep_graph=dep_graph,
                             initial_node=initial_node,
                             lto_output_suffix=lto_output_suffix)

    # We stop recursive traversal at executables because those are just code generators
    # (protoc-gen-insertions, protoc-gen-yrpc, bfql_codegen, bfpg_codegen).

    conf = dep_graph.conf

    link_helper.consume_original_link_cmd()
    link_helper.add_leaf_object_files()
    link_helper.add_final_args(lto_type=lto_type)

    with WorkDirContext(conf.build_root):
        if link_cmd_out_file:
            link_helper.write_link_cmd_file(link_cmd_out_file)
        if not run_linker:
            return
        link_helper.run_linker()
示例#4
0
    def add_leaf_object_files(self) -> None:
        """
        Goes over all the object files that the original node transitively depends on, and adds
        them to the link command if they have not already been added.
        """

        transitive_deps = self.initial_node.get_recursive_deps(
            skip_node_types=set([NodeType.EXECUTABLE]))
        with WorkDirContext(self.build_root):
            # Sort nodes by path for determinism.
            for node in sorted(list(transitive_deps),
                               key=lambda dep: dep.path):
                if node in self.obj_file_graph_nodes:
                    # Dedup .cc.o files already existing on the command line.
                    continue

                if node.node_type == NodeType.OBJECT:
                    self.new_args.add_new_arg(node.path)

            for arg in self.yb_pgbackend_link_cmd:
                if arg.endswith('.o'):
                    if os.path.basename(arg) == 'main_cpp_wrapper.cc.o':
                        # TOOD: why is this file even linked into libyb_pgbackend?
                        continue
                    self.new_args.append(
                        os.path.join(self.pg_backend_build_dir, arg))
                    continue
                if (arg.startswith('-l') and not self.new_args.contains(arg)
                        and not arg.startswith('-lyb_')):
                    self.process_arg(arg)
示例#5
0
    def get_build_stamp(self, include_env_vars):
        """
        Creates a "build stamp" that tries to capture all inputs that might affect the PostgreSQL
        code. This is needed to avoid needlessly rebuilding PostgreSQL, as it takes ~10 seconds
        even if there are no code changes.
        """

        with WorkDirContext(YB_SRC_ROOT):
            code_subset = [
                'src/postgres', 'src/yb/yql/pggate',
                'python/yb/build_postgres.py', 'build-support/build_postgres',
                'CMakeLists.txt'
            ]
            git_hash = subprocess.check_output(
                ['git', '--no-pager', 'log', '-n', '1', '--pretty=%H'] +
                code_subset).strip()
            git_diff = subprocess.check_output(['git', 'diff'] + code_subset)
            git_diff_cached = subprocess.check_output(
                ['git', 'diff', '--cached'] + code_subset)

        env_vars_str = self.get_env_vars_str(self.env_vars_for_build_stamp)
        build_stamp = "\n".join([
            "git_commit_sha1=%s" % git_hash,
            "git_diff_sha256=%s" % sha256(git_diff),
            "git_diff_cached_sha256=%s" % sha256(git_diff_cached)
        ])

        if include_env_vars:
            build_stamp += "\nenv_vars_sha256=%s" % hashlib.sha256(
                env_vars_str).hexdigest()

        return build_stamp.strip()
示例#6
0
    def test_work_dir_context(self):
        old_work_dir = os.getcwd()
        for d in ['/tmp', os.path.expanduser('~')]:
            with WorkDirContext(d):
                self.assertEquals(d, os.getcwd())
                self.assertEquals(d, run_program('pwd').stdout.strip())

        self.assertEquals(old_work_dir, os.getcwd())
示例#7
0
    def build_postgres(self):
        start_time_sec = time.time()
        if self.args.clean:
            self.clean_postgres()

        mkdir_p(self.pg_build_root)

        self.set_env_vars('configure')
        saved_build_stamp = self.get_saved_build_stamp()
        initial_build_stamp = self.get_build_stamp(include_env_vars=True)
        initial_build_stamp_no_env = self.get_build_stamp(
            include_env_vars=False)
        logging.info("PostgreSQL build stamp:\n%s", initial_build_stamp)

        if initial_build_stamp == saved_build_stamp:
            if self.export_compile_commands:
                logging.info(
                    "Even though PostgreSQL is already up-to-date in directory %s, we still need "
                    "to create compile_commands.json, so proceeding with %s",
                    self.pg_build_root, self.steps_description())
            else:
                logging.info(
                    "PostgreSQL is already up-to-date in directory %s, skipping %s.",
                    self.pg_build_root, self.steps_description())
                return

        with WorkDirContext(self.pg_build_root):
            if self.should_configure:
                self.sync_postgres_source()
                configure_start_time_sec = time.time()
                self.configure_postgres()
                logging.info(
                    "The configure step of building PostgreSQL took %.1f sec",
                    time.time() - configure_start_time_sec)
            if self.should_make:
                make_start_time_sec = time.time()
                self.make_postgres()
                logging.info(
                    "The make step of building PostgreSQL took %.1f sec",
                    time.time() - make_start_time_sec)

        if self.should_make:
            # Guard against the code having changed while we were building it.
            final_build_stamp_no_env = self.get_build_stamp(
                include_env_vars=False)
            if final_build_stamp_no_env == initial_build_stamp_no_env:
                logging.info("Updating build stamp file at %s",
                             self.build_stamp_path)
                with open(self.build_stamp_path, 'w') as build_stamp_file:
                    build_stamp_file.write(initial_build_stamp)
            else:
                logging.warning(
                    "PostgreSQL build stamp changed during the build! Not updating."
                )

        logging.info("PostgreSQL build (%s) took %.1f sec",
                     self.steps_description(),
                     time.time() - start_time_sec)
示例#8
0
    def build_postgres(self):
        if self.args.clean:
            self.clean_postgres()

        mkdir_p(self.pg_build_root)

        with WorkDirContext(self.pg_build_root):
            self.sync_postgres_source()
            self.configure_postgres()
            self.make_postgres()
示例#9
0
 def run_linker(self) -> None:
     with WorkDirContext(self.build_root):
         start_time_sec = time.time()
         logging.info("Running linker")
         try:
             subprocess.check_call(self.new_args.args)
         except subprocess.CalledProcessError as ex:
             # Avoid printing the extremely long command line.
             logging.error("Linker returned exit code %d", ex.returncode)
         elapsed_time_sec = time.time() - start_time_sec
         logging.info("Linking finished in %.1f sec", elapsed_time_sec)
示例#10
0
    def get_build_stamp(self, include_env_vars: bool) -> str:
        """
        Creates a "build stamp" that tries to capture all inputs that might affect the PostgreSQL
        code. This is needed to avoid needlessly rebuilding PostgreSQL, as it takes ~10 seconds
        even if there are no code changes.
        """

        with WorkDirContext(YB_SRC_ROOT):
            # Postgres files.
            pathspec = [
                'src/postgres',
                'src/yb/yql/pggate',
                'python/yb/build_postgres.py',
                'build-support/build_postgres',
                'CMakeLists.txt',
            ]
            git_version = self.get_git_version()
            if git_version and git_version >= semantic_version.Version(
                    '1.9.0'):
                # Git version 1.8.5 allows specifying glob pathspec, and Git version 1.9.0 allows
                # specifying negative pathspec.  Use them to exclude changes to regress test files
                # not needed for build.
                pathspec.extend([
                    ':(glob,exclude)src/postgres/**/*_schedule',
                    ':(glob,exclude)src/postgres/**/data/*.csv',
                    ':(glob,exclude)src/postgres/**/data/*.data',
                    ':(glob,exclude)src/postgres/**/expected/*.out',
                    ':(glob,exclude)src/postgres/**/input/*.source',
                    ':(glob,exclude)src/postgres/**/output/*.source',
                    ':(glob,exclude)src/postgres/**/specs/*.spec',
                    ':(glob,exclude)src/postgres/**/sql/*.sql',
                    ':(glob,exclude)src/postgres/.clang-format',
                    ':(glob,exclude)src/postgres/src/test/regress/README',
                    ':(glob,exclude)src/postgres/src/test/regress/yb_lint_regress_schedule.sh',
                ])
            # Get the most recent commit that touched postgres files.
            git_hash = subprocess.check_output(
                ['git', '--no-pager', 'log', '-n', '1', '--format=%H', '--'] +
                pathspec).decode('utf-8').strip()
            # Get uncommitted changes to tracked postgres files.
            git_diff = subprocess.check_output(['git', 'diff', 'HEAD', '--'] +
                                               pathspec)

        env_vars_str = self.get_env_vars_str(self.env_vars_for_build_stamp)
        build_stamp = "\n".join([
            "git_commit_sha1=%s" % git_hash,
            "git_diff_sha256=%s" % hashlib.sha256(git_diff).hexdigest(),
        ])

        if include_env_vars:
            build_stamp += "\nenv_vars_sha256=%s" % hashlib.sha256(
                env_vars_str.encode('utf-8')).hexdigest()

        return build_stamp.strip()
示例#11
0
    def make_postgres(self):
        self.set_env_vars('make')
        make_cmd = ['make']

        make_parallelism = os.environ.get('YB_MAKE_PARALLELISM')
        if make_parallelism:
            make_parallelism = int(make_parallelism)
        if self.build_uses_remote_compilation and not self.remote_compilation_allowed:
            # Since we're building everything locally in this case, and YB_MAKE_PARALLELISM is
            # likely specified for distributed compilation, cap it at some factor times the number
            # of CPU cores.
            parallelism_cap = multiprocessing.cpu_count() * 2
            if make_parallelism:
                make_parallelism = min(parallelism_cap, make_parallelism)
            else:
                make_parallelism = cpu_count

        if make_parallelism:
            make_cmd += ['-j', str(int(make_parallelism))]

        os.environ['YB_COMPILER_TYPE'] = self.compiler_type

        # Create a script allowing to easily run "make" from the build directory with the right
        # environment.
        make_script_content = "#!/usr/bin/env bash\n"
        for env_var_name in [
                'YB_SRC_ROOT', 'YB_BUILD_ROOT', 'YB_BUILD_TYPE', 'CFLAGS', 'CXXFLAGS', 'LDFLAGS',
                'PATH']:
            env_var_value = os.environ[env_var_name]
            if env_var_value is None:
                raise RuntimeError("Expected env var %s to be set" % env_var_name)
            make_script_content += "export %s=%s\n" % (env_var_name, quote_for_bash(env_var_value))
        make_script_content += 'make "$@"\n'

        for work_dir in [self.pg_build_root, os.path.join(self.pg_build_root, 'contrib')]:
            with WorkDirContext(work_dir):
                # Create a script to run Make easily with the right environment.
                make_script_path = 'make.sh'
                with open(make_script_path, 'w') as out_f:
                    out_f.write(make_script_content)
                run_program(['chmod', 'u+x', make_script_path])

                # Actually run Make.
                if is_verbose_mode():
                    logging.info("Running make in the %s directory", work_dir)
                make_result = run_program(make_cmd)
                write_program_output_to_file('make', make_result, work_dir)
                make_install_result = run_program(['make', 'install'])
                write_program_output_to_file('make_install', make_install_result, work_dir)
                logging.info("Successfully ran make in the %s directory", work_dir)
示例#12
0
def main():
    if os.environ.get('YB_SKIP_INITIAL_SYS_CATALOG_SNAPSHOT', '0') == '1':
        logging.info(
            'YB_SKIP_INITIAL_SYS_CATALOG_SNAPSHOT is set, skipping initdb')
        return

    build_root = os.environ['YB_BUILD_ROOT']
    tool_name = 'create_initial_sys_catalog_snapshot'
    tool_path = os.path.join(build_root, 'tests-pgwrapper', tool_name)
    snapshot_dest_path = os.path.join(build_root, 'share',
                                      'initial_sys_catalog_snapshot')

    file_to_check = os.path.join(snapshot_dest_path,
                                 'exported_tablet_metadata_changes')
    if (os.path.exists(file_to_check) and os.environ.get(
            'YB_RECREATE_INITIAL_SYS_CATALOG_SNAPSHOT', '') != '1'):
        logging.info(
            "Initial sys catalog snapshot already exists, not re-creating: %s",
            snapshot_dest_path)
        return
    if os.path.exists(snapshot_dest_path):
        logging.info("Removing initial sys catalog snapshot data at: %s",
                     snapshot_dest_path)
        shutil.rmtree(snapshot_dest_path)

    mkdir_p(os.path.dirname(snapshot_dest_path))

    start_time_sec = time.time()
    logging.info("Starting creating initial system catalog snapshot data")
    logging.info("Logging to: %s", os.path.join(build_root,
                                                tool_name + '.err'))
    os.environ['YB_EXTRA_GTEST_FLAGS'] = (
        '--initial_sys_catalog_snapshot_dest_path=' + snapshot_dest_path)
    os.environ['YB_CTEST_VERBOSE'] = '1'
    with WorkDirContext(build_root):
        initdb_result = run_program([
            os.path.join(YB_SRC_ROOT, 'build-support', 'run-test.sh'),
            tool_path,
        ],
                                    stdout_stderr_prefix=tool_name,
                                    shell=True,
                                    error_ok=True)
        elapsed_time_sec = time.time() - start_time_sec
        if initdb_result.failure():
            initdb_result.print_output_to_stdout()
            raise RuntimeError("initdb failed in %.1f sec" % elapsed_time_sec)

    logging.info(
        "Initial system catalog snapshot data creation took %1.f sec. Wrote data to: %s",
        elapsed_time_sec, snapshot_dest_path)
示例#13
0
    def make_postgres(self):
        self.set_env_vars('make')
        make_cmd = ['make']

        make_parallelism = os.environ.get('YB_MAKE_PARALLELISM')
        if make_parallelism:
            make_cmd += ['-j', str(int(make_parallelism))]
        os.environ['YB_COMPILER_TYPE'] = self.compiler_type

        # Create a script allowing to easily run "make" from the build directory with the right
        # environment.
        make_script_content = "#!/usr/bin/env bash\n"
        for env_var_name in [
                'YB_SRC_ROOT', 'YB_BUILD_ROOT', 'YB_BUILD_TYPE', 'CFLAGS',
                'CXXFLAGS', 'LDFLAGS', 'PATH'
        ]:
            env_var_value = os.environ[env_var_name]
            if env_var_value is None:
                raise RuntimeError("Expected env var %s to be set" %
                                   env_var_name)
            make_script_content += "export %s=%s\n" % (
                env_var_name, quote_for_bash(env_var_value))
        make_script_content += 'make "$@"\n'

        for work_dir in [
                self.pg_build_root,
                os.path.join(self.pg_build_root, 'contrib')
        ]:
            with WorkDirContext(work_dir):
                # Create a script to run Make easily with the right environment.
                make_script_path = 'make.sh'
                with open(make_script_path, 'w') as out_f:
                    out_f.write(make_script_content)
                run_program(['chmod', 'u+x', make_script_path])

                # Actually run Make.
                if is_verbose_mode():
                    logging.info("Running make in the %s directory", work_dir)
                make_result = run_program(make_cmd)
                write_program_output_to_file('make', make_result, work_dir)
                make_install_result = run_program(['make', 'install'])
                write_program_output_to_file('make_install',
                                             make_install_result, work_dir)
                logging.info("Successfully ran make in the %s directory",
                             work_dir)
    def build_postgres(self):
        start_time_sec = time.time()
        if self.args.clean:
            self.clean_postgres()

        mkdir_p(self.pg_build_root)

        self.set_env_vars('configure')
        saved_build_stamp = self.get_saved_build_stamp()
        initial_build_stamp = self.get_build_stamp(include_env_vars=True)
        initial_build_stamp_no_env = self.get_build_stamp(include_env_vars=False)
        logging.info("PostgreSQL build stamp:\n%s", initial_build_stamp)
        if initial_build_stamp == saved_build_stamp:
            logging.info(
                "PostgreSQL is already up-to-date in directory %s, not rebuilding.",
                self.pg_build_root)
            if self.export_compile_commands:
                self.should_build = False
                logging.info("Still need to create compile_commands.json, proceeding.")
            else:
                return

        with WorkDirContext(self.pg_build_root):
            if self.should_build:
                self.sync_postgres_source()
                if os.environ.get('YB_PG_SKIP_CONFIGURE', '0') != '1':
                    configure_start_time_sec = time.time()
                    self.configure_postgres()
                    logging.info("The configure step of building PostgreSQL took %.1f sec",
                                 time.time() - configure_start_time_sec)
            make_start_time_sec = time.time()
            self.make_postgres()
            logging.info("The make step of building PostgreSQL took %.1f sec",
                         time.time() - make_start_time_sec)
        final_build_stamp_no_env = self.get_build_stamp(include_env_vars=False)
        if final_build_stamp_no_env == initial_build_stamp_no_env:
            logging.info("Updating build stamp file at %s", self.build_stamp_path)
            with open(self.build_stamp_path, 'w') as build_stamp_file:
                build_stamp_file.write(initial_build_stamp)
        else:
            logging.warning("PostgreSQL build stamp changed during the build! Not updating.")
        logging.info("PostgreSQL build took %.1f sec", time.time() - start_time_sec)
示例#15
0
    def add_final_args(self) -> None:
        self.new_args.extend([
            '-L%s' % os.path.join(self.build_root, 'postgres', 'lib'),
            '-l:libpgcommon.a',
            '-l:libpgport.a',
            '-l:libpq.a',
            '-fwhole-program',
            '-Wl,-v',
            '-nostdlib++',
            '-flto=full',
        ])

        for lib_name in ['libc++.a', 'libc++abi.a']:
            self.new_args.append(
                os.path.join(self.thirdparty_path, 'installed',
                             'uninstrumented', 'libcxx', 'lib', lib_name))

        with WorkDirContext(self.build_root):
            self.write_link_cmd_file(self.final_output_name +
                                     '_lto_link_cmd_args.txt')
示例#16
0
    def consume_original_link_cmd(self) -> None:
        """
        Goes over the original linker command and reuses some of its arguments for the new command.
        """
        with WorkDirContext(self.build_root):
            expect_output_name = False
            output_name: Optional[str] = None
            for arg in self.original_link_args:
                if arg == '-o':
                    expect_output_name = True
                    continue
                if expect_output_name:
                    if output_name:
                        raise ValueError(
                            "Found multiple output names in the original link command: "
                            "%s and %s" % (output_name, arg))
                    output_name = arg
                    expect_output_name = False
                    continue
                expect_output_name = False

                if is_yb_library(arg):
                    logging.info("Skipping YB library: %s", arg)
                    continue

                if arg.endswith('.cc.o'):
                    # E.g. tablet_server_main.cc.o.
                    # Remember this node for later deduplication.
                    self.obj_file_graph_nodes.add(
                        self.dep_graph.find_node(os.path.realpath(arg)))

                self.process_arg(arg)

            if not output_name:
                raise ValueError(
                    "Did not find an output name in the original link command")
            self.final_output_name = os.path.abspath(output_name)
            logging.info("Final output file name: %s", self.final_output_name)
            if self.lto_output_suffix is not None:
                self.final_output_name += self.lto_output_suffix
            self.new_args.extend(['-o', self.final_output_name])
示例#17
0
    def add_final_args(self, lto_type: str) -> None:
        assert lto_type in ['full', 'thin']
        for static_lib_path in sorted(self.static_libs_from_ldd):
            if not self.new_args.contains(static_lib_path):
                logging.info(
                    "Adding a static library determined using shared library dependencies: %s "
                    "(needed by: %s)",
                    static_lib_path,
                    # The static_libs_from_ldd dictionary stores the set of shared library paths
                    # that caused us to add a particular static library dependency as the value
                    # corresponding to that static library's path in the key.
                    ', '.join(
                        sorted(self.static_libs_from_ldd[static_lib_path])))
                self.new_args.append(static_lib_path)

        self.new_args.extend([
            '-L%s' % os.path.join(self.build_root, 'postgres', 'lib'),
            '-l:libpgcommon.a',
            '-l:libpgport.a',
            '-l:libpq.a',
            '-fwhole-program',
            '-Wl,-v',
            '-nostdlib++',
            # For __res_nsearch, ns_initparse, ns_parserr, ns_name_uncompress.
            # See https://github.com/yugabyte/yugabyte-db/issues/12738 for details.
            '-lresolv',
            '-flto=' + lto_type,
        ])

        for lib_name in LIBCXX_STATIC_LIB_NAMES:
            self.new_args.append(
                os.path.join(self.thirdparty_path, 'installed',
                             'uninstrumented', 'libcxx', 'lib', lib_name))

        with WorkDirContext(self.build_root):
            self.write_link_cmd_file(self.final_output_name +
                                     '_lto_link_cmd_args.txt')
示例#18
0
def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument('--program_name', type=str, help="Program name")
    parser.add_argument('--output_file_path',
                        type=str,
                        help="Path to output file")
    args = parser.parse_args()

    start_time_sec = time.time()
    build_root = os.environ['YB_BUILD_ROOT']

    with WorkDirContext(build_root):
        content = run_program([
            os.path.join(build_root, 'bin', args.program_name),
            "--dump_flags_xml",
        ],
                              shell=True)

        with open(args.output_file_path, 'w+', encoding='utf-8') as f:
            f.write(content.stdout)

    elapsed_time_sec = time.time() - start_time_sec
    logging.info("Generated flags_metadata for %s in %.1f sec",
                 args.program_name, elapsed_time_sec)
示例#19
0
    def make_postgres(self):
        self.set_env_vars('make')
        # Postgresql requires MAKELEVEL to be 0 or non-set when calling its make.
        # But in case YB project is built with make, MAKELEVEL is not 0 at this point.
        make_cmd = ['make', 'MAKELEVEL=0']

        make_parallelism = os.environ.get('YB_MAKE_PARALLELISM')
        if make_parallelism:
            make_parallelism = int(make_parallelism)
        if self.build_uses_remote_compilation and not self.remote_compilation_allowed:
            # Since we're building everything locally in this case, and YB_MAKE_PARALLELISM is
            # likely specified for distributed compilation, cap it at some factor times the number
            # of CPU cores.
            parallelism_cap = multiprocessing.cpu_count() * 2
            if make_parallelism:
                make_parallelism = min(parallelism_cap, make_parallelism)
            else:
                make_parallelism = cpu_count

        if make_parallelism:
            make_cmd += ['-j', str(int(make_parallelism))]

        self.set_env_var('YB_COMPILER_TYPE', self.compiler_type)

        # Create a script allowing to easily run "make" from the build directory with the right
        # environment.
        env_script_content = ''
        for env_var_name in CONFIG_ENV_VARS:
            env_var_value = os.environ.get(env_var_name)
            if env_var_value is None:
                raise RuntimeError("Expected env var %s to be set" %
                                   env_var_name)
            env_script_content += "export %s=%s\n" % (
                env_var_name, quote_for_bash(env_var_value))

        compile_commands_files = []

        work_dirs = [self.pg_build_root]
        if self.build_type != 'compilecmds':
            work_dirs.append(os.path.join(self.pg_build_root, 'contrib'))

        for work_dir in work_dirs:
            with WorkDirContext(work_dir):
                # Create a script to run Make easily with the right environment.
                if self.should_build:
                    make_script_path = 'make.sh'
                    with open(make_script_path, 'w') as out_f:
                        out_f.write('#!/usr/bin/env bash\n'
                                    '. "${BASH_SOURCE%/*}"/env.sh\n'
                                    'make "$@"\n')
                    with open('env.sh', 'w') as out_f:
                        out_f.write(env_script_content)

                    run_program(['chmod', 'u+x', make_script_path])

                    # Actually run Make.
                    if is_verbose_mode():
                        logging.info("Running make in the %s directory",
                                     work_dir)
                    run_program(make_cmd,
                                stdout_stderr_prefix='make',
                                cwd=work_dir,
                                shell=True,
                                error_ok=True
                                ).print_output_and_raise_error_if_failed()
                if self.build_type == 'compilecmds':
                    logging.info(
                        "Not running make install in the %s directory since we are only "
                        "generating the compilation database", work_dir)
                else:
                    run_program('make install',
                                stdout_stderr_prefix='make_install',
                                cwd=work_dir,
                                shell=True,
                                error_ok=True
                                ).print_output_and_raise_error_if_failed()
                    logging.info("Successfully ran make in the %s directory",
                                 work_dir)

                if self.export_compile_commands:
                    logging.info(
                        "Generating the compilation database in directory '%s'",
                        work_dir)

                    compile_commands_path = os.path.join(
                        work_dir, 'compile_commands.json')
                    self.set_env_var('YB_PG_SKIP_CONFIG_STATUS', '1')
                    if (not os.path.exists(compile_commands_path)
                            or not self.export_compile_commands_lazily):
                        run_program(['compiledb', 'make', '-n'],
                                    capture_output=False)
                    del os.environ['YB_PG_SKIP_CONFIG_STATUS']

                    if not os.path.exists(compile_commands_path):
                        raise RuntimeError(
                            "Failed to generate compilation database at: %s" %
                            compile_commands_path)
                    compile_commands_files.append(compile_commands_path)

        if self.export_compile_commands:
            self.combine_compile_commands(compile_commands_files)
示例#20
0
    def make_postgres(self):
        self.set_env_vars('make')
        make_cmd = ['make']

        make_parallelism = os.environ.get('YB_MAKE_PARALLELISM')
        if make_parallelism:
            make_parallelism = int(make_parallelism)
        if self.build_uses_remote_compilation and not self.remote_compilation_allowed:
            # Since we're building everything locally in this case, and YB_MAKE_PARALLELISM is
            # likely specified for distributed compilation, cap it at some factor times the number
            # of CPU cores.
            parallelism_cap = multiprocessing.cpu_count() * 2
            if make_parallelism:
                make_parallelism = min(parallelism_cap, make_parallelism)
            else:
                make_parallelism = cpu_count

        if make_parallelism:
            make_cmd += ['-j', str(int(make_parallelism))]

        os.environ['YB_COMPILER_TYPE'] = self.compiler_type

        # Create a script allowing to easily run "make" from the build directory with the right
        # environment.
        env_script_content = ''
        for env_var_name in [
                'YB_SRC_ROOT', 'YB_BUILD_ROOT', 'YB_BUILD_TYPE', 'CFLAGS',
                'CXXFLAGS', 'LDFLAGS', 'PATH'
        ]:
            env_var_value = os.environ[env_var_name]
            if env_var_value is None:
                raise RuntimeError("Expected env var %s to be set" %
                                   env_var_name)
            env_script_content += "export %s=%s\n" % (
                env_var_name, quote_for_bash(env_var_value))

        compile_commands_files = []

        for work_dir in [
                self.pg_build_root,
                os.path.join(self.pg_build_root, 'contrib')
        ]:
            with WorkDirContext(work_dir):
                # Create a script to run Make easily with the right environment.
                make_script_path = 'make.sh'
                with open(make_script_path, 'w') as out_f:
                    out_f.write('#!/usr/bin/env bash\n'
                                '. "${BASH_SOURCE%/*}"/env.sh\n'
                                'make "$@"\n')
                with open('env.sh', 'w') as out_f:
                    out_f.write(env_script_content)

                run_program(['chmod', 'u+x', make_script_path])

                # Actually run Make.
                if is_verbose_mode():
                    logging.info("Running make in the %s directory", work_dir)
                make_result = run_program(make_cmd)
                write_program_output_to_file('make', make_result, work_dir)
                make_install_result = run_program(['make', 'install'])
                write_program_output_to_file('make_install',
                                             make_install_result, work_dir)
                logging.info("Successfully ran make in the %s directory",
                             work_dir)

                if self.export_compile_commands:
                    logging.info(
                        "Generating the compilation database in directory '%s'",
                        work_dir)

                    compile_commands_path = os.path.join(
                        work_dir, 'compile_commands.json')
                    os.environ['YB_PG_SKIP_CONFIG_STATUS'] = '1'
                    if (not os.path.exists(compile_commands_path)
                            or not self.export_compile_commands_lazily):
                        run_program(['compiledb', 'make', '-n'],
                                    capture_output=False)
                    del os.environ['YB_PG_SKIP_CONFIG_STATUS']

                    if not os.path.exists(compile_commands_path):
                        raise RuntimeError(
                            "Failed to generate compilation database at: %s" %
                            compile_commands_path)
                    compile_commands_files.append(compile_commands_path)

        if self.export_compile_commands:
            self.combine_compile_commands(compile_commands_files)
示例#21
0
def main() -> None:
    parser = argparse.ArgumentParser(
        description='A tool for working with the dependency graph')
    parser.add_argument('--verbose',
                        action='store_true',
                        help='Enable debug output')
    parser.add_argument(
        '-r',
        '--rebuild-graph',
        action='store_true',
        help='Rebuild the dependecy graph and save it to a file')
    parser.add_argument('--node-type',
                        help='Node type to look for',
                        type=NodeType,
                        choices=list(NodeType),
                        default=NodeType.ANY)
    parser.add_argument(
        '--file-regex',
        help='Regular expression for file names to select as initial nodes for '
        'querying the dependency graph.')
    parser.add_argument(
        '--file-name-glob',
        help='Like file-regex, but applies only to file name and uses the glob '
        'syntax instead of regex.')
    parser.add_argument(
        '--git-diff',
        help='Figure out the list of files to use as starting points in the '
        'dependency graph traversal by diffing the current state of the code '
        'against this commit. This could also be anything that could be '
        'passed to "git diff" as a single argument.')
    parser.add_argument(
        '--git-commit',
        help='Similar to --git-diff, but takes a git commit ref (e.g. sha1 or '
        'branch) and uses the set of files from that commit.')
    parser.add_argument(
        '--build-root',
        required=True,
        help='E.g. <some_root>/build/debug-gcc-dynamic-community')
    parser.add_argument('command',
                        type=Command,
                        choices=list(Command),
                        help='Command to perform')
    parser.add_argument(
        '--output-test-config',
        help=
        'Output a "test configuration file", which is a JSON containing the '
        'resulting list of C++ tests to run to this file, a flag indicating '
        'wheter to run Java tests or not, etc.')
    parser.add_argument(
        '--incomplete-build',
        action='store_true',
        help='Skip checking for file existence. Allows using the tool after '
        'build artifacts have been deleted.')
    parser.add_argument(
        '--build-args',
        help='Extra arguments to pass to yb_build.sh. The build is invoked e.g. '
        'if the compilation database file is missing.')
    parser.add_argument(
        '--link-cmd-out-file',
        help='For the %s command, write the linker arguments (one per line ) '
        'to the given file.')
    parser.add_argument(
        '--lto-output-suffix',
        default="-lto",
        help='The suffix to append to LTO-enabled binaries produced by '
        'the %s command' % Command.LINK_WHOLE_PROGRAM.value)
    parser.add_argument(
        '--run-linker',
        help=
        'Whether to actually run the linker. Setting this to false might be useful when '
        'debugging, combined with --link-cmd-out-file.',
        type=arg_str_to_bool,
        default=True)

    args = parser.parse_args()

    if args.file_regex and args.file_name_glob:
        raise RuntimeError(
            '--file-regex and --file-name-glob are incompatible')

    cmd = args.command
    if (not args.file_regex and not args.file_name_glob
            and not args.rebuild_graph and not args.git_diff
            and not args.git_commit
            and cmd not in COMMANDS_NOT_NEEDING_TARGET_SET):
        raise RuntimeError(
            "Neither of --file-regex, --file-name-glob, --git-{diff,commit}, or "
            "--rebuild-graph are specified, and the command is not one of: " +
            ", ".join([cmd.value for cmd in COMMANDS_NOT_NEEDING_TARGET_SET]))

    log_level = logging.INFO
    logging.basicConfig(
        level=log_level,
        format=
        "[%(filename)s:%(lineno)d] %(asctime)s %(levelname)s: %(message)s")

    conf = DepGraphConf(verbose=args.verbose,
                        build_root=args.build_root,
                        incomplete_build=args.incomplete_build,
                        file_regex=args.file_regex,
                        file_name_glob=args.file_name_glob,
                        build_args=args.build_args)
    if conf.file_regex and args.git_diff:
        raise RuntimeError(
            "--git-diff is incompatible with --file-{regex,name-glob}")

    if args.git_diff and args.git_commit:
        raise RuntimeError('--git-diff and --git-commit are incompatible')

    if args.git_commit:
        args.git_diff = "{}^..{}".format(args.git_commit, args.git_commit)

    graph_cache_path = os.path.join(args.build_root, 'dependency_graph.json')
    if args.rebuild_graph or not os.path.isfile(graph_cache_path):
        logging.info(
            "Generating a dependency graph at '{}'".format(graph_cache_path))
        dep_graph_builder = DependencyGraphBuilder(conf)
        dep_graph = dep_graph_builder.build()
        dep_graph.save_as_json(graph_cache_path)
    else:
        start_time = datetime.now()
        with open(graph_cache_path) as graph_input_file:
            dep_graph = DependencyGraph(conf,
                                        json_data=json.load(graph_input_file))
        logging.info("Loaded dependency graph from '%s' in %.2f sec" %
                     (graph_cache_path,
                      (datetime.now() - start_time).total_seconds()))
        dep_graph.validate_node_existence()

    # ---------------------------------------------------------------------------------------------
    # Commands that do not require an "initial set of targets"
    # ---------------------------------------------------------------------------------------------

    if cmd == Command.SELF_TEST:
        run_self_test(dep_graph)
        return
    if cmd == Command.DEBUG_DUMP:
        dep_graph.dump_debug_info()
        return

    # ---------------------------------------------------------------------------------------------
    # Figure out the initial set of targets based on a git commit, a regex, etc.
    # ---------------------------------------------------------------------------------------------

    updated_categories: Set[SourceFileCategory] = set()
    file_changes = []
    initial_nodes: Iterable[Node]
    if args.git_diff:
        old_working_dir = os.getcwd()
        with WorkDirContext(conf.yb_src_root):
            git_diff_output = subprocess.check_output(
                ['git', 'diff', args.git_diff, '--name-only']).decode('utf-8')

            initial_nodes = set()
            file_paths = set()
            for file_path in git_diff_output.split("\n"):
                file_path = file_path.strip()
                if not file_path:
                    continue
                file_changes.append(file_path)
                # It is important that we invoke os.path.realpath with the current directory set to
                # the git repository root.
                file_path = os.path.realpath(file_path)
                file_paths.add(file_path)
                node = dep_graph.node_by_path.get(file_path)
                if node:
                    initial_nodes.add(node)

        if not initial_nodes:
            logging.warning(
                "Did not find any graph nodes for this set of files: %s",
                file_paths)
            for basename in set(
                [os.path.basename(file_path) for file_path in file_paths]):
                logging.warning("Nodes for basename '{}': {}".format(
                    basename, dep_graph.find_nodes_by_basename(basename)))

    elif conf.file_regex:
        logging.info("Using file name regex: {}".format(conf.file_regex))
        initial_nodes = dep_graph.find_nodes_by_regex(conf.file_regex)
        if not initial_nodes:
            logging.warning(
                "Did not find any graph nodes for this pattern: %s",
                conf.file_regex)
        for node in initial_nodes:
            file_changes.append(node.path)
    else:
        raise RuntimeError(
            "Could not figure out how to generate the initial set of files")

    file_changes = [(os.path.relpath(file_path, conf.yb_src_root)
                     if os.path.isabs(file_path) else file_path)
                    for file_path in file_changes]

    if cmd == Command.LINK_WHOLE_PROGRAM:
        link_whole_program(dep_graph=dep_graph,
                           initial_nodes=initial_nodes,
                           link_cmd_out_file=args.link_cmd_out_file,
                           run_linker=args.run_linker,
                           lto_output_suffix=args.lto_output_suffix)
        return

    file_changes_by_category: Dict[SourceFileCategory, List[str]] = group_by(
        file_changes, get_file_category)

    # Same as file_changes_by_category, but with string values of categories instead of enum
    # elements.
    file_changes_by_category_str: Dict[str, List[str]] = {}
    for category, changes in file_changes_by_category.items():
        logging.info("File changes in category '%s':", category)
        for change in sorted(changes):
            logging.info("    %s", change)
        file_changes_by_category_str[category.value] = changes

    updated_categories = set(file_changes_by_category.keys())

    results: Set[Node] = set()
    if cmd == Command.AFFECTED:
        results = dep_graph.find_affected_nodes(set(initial_nodes),
                                                args.node_type)

    elif cmd == Command.DEPS:
        for node in initial_nodes:
            results.update(node.deps)
    elif cmd == Command.REVERSE_DEPS:
        for node in initial_nodes:
            results.update(node.reverse_deps)
    else:
        raise ValueError("Unimplemented command '{}'".format(cmd))

    if args.output_test_config:
        test_basename_list = sorted([
            os.path.basename(node.path) for node in results
            if node.node_type == NodeType.TEST
        ])
        affected_basenames = set(
            [os.path.basename(node.path) for node in results])

        # These are ALL tests, not just tests affected by the changes in question, used mostly
        # for logging.
        all_test_programs = [
            node for node in dep_graph.get_nodes()
            if node.node_type == NodeType.TEST
        ]
        all_test_basenames = set(
            [os.path.basename(node.path) for node in all_test_programs])

        # A very conservative way to decide whether to run all tests. If there are changes in any
        # categories (meaning the changeset is non-empty), and there are changes in categories other
        # than C++ / Java / files known not to affect unit tests, we force re-running all tests.
        unsafe_categories = updated_categories - CATEGORIES_NOT_CAUSING_RERUN_OF_ALL_TESTS
        user_said_all_tests = get_bool_env_var('YB_RUN_ALL_TESTS')

        test_filter_re = os.getenv('YB_TEST_EXECUTION_FILTER_RE')
        manual_test_filtering_with_regex = bool(test_filter_re)

        select_all_tests_for_now = (bool(unsafe_categories)
                                    or user_said_all_tests
                                    or manual_test_filtering_with_regex)

        user_said_all_cpp_tests = get_bool_env_var('YB_RUN_ALL_CPP_TESTS')
        user_said_all_java_tests = get_bool_env_var('YB_RUN_ALL_JAVA_TESTS')
        cpp_files_changed = SourceFileCategory.CPP in updated_categories
        java_files_changed = SourceFileCategory.JAVA in updated_categories
        yb_master_or_tserver_changed = bool(affected_basenames
                                            & set(['yb-master', 'yb-tserver']))

        run_cpp_tests = select_all_tests_for_now or cpp_files_changed or user_said_all_cpp_tests

        run_java_tests = (select_all_tests_for_now or java_files_changed
                          or yb_master_or_tserver_changed
                          or user_said_all_java_tests)

        if select_all_tests_for_now:
            if user_said_all_tests:
                logging.info(
                    "User explicitly specified that all tests should be run")
            elif manual_test_filtering_with_regex:
                logging.info(
                    "YB_TEST_EXECUTION_FILTER_RE specified: %s, will filter tests at a later step",
                    test_filter_re)
            else:
                logging.info(
                    "All tests should be run based on file changes in these categories: {}"
                    .format(', '.join(
                        sorted([
                            category.value for category in unsafe_categories
                        ]))))
        else:
            if run_cpp_tests:
                if user_said_all_cpp_tests:
                    logging.info(
                        "User explicitly specified that all C++ tests should be run"
                    )
                else:
                    logging.info(
                        'Will run some C++ tests, some C++ files changed')
            if run_java_tests:
                if user_said_all_java_tests:
                    logging.info(
                        "User explicitly specified that all Java tests should be run"
                    )
                else:
                    logging.info('Will run all Java tests, ' + ' and '.join(
                        (['some Java files changed'] if java_files_changed else
                         []) + (['yb-{master,tserver} binaries changed']
                                if yb_master_or_tserver_changed else [])))

        if run_cpp_tests and not test_basename_list and not select_all_tests_for_now:
            logging.info(
                'There are no C++ test programs affected by the changes, '
                'will skip running C++ tests.')
            run_cpp_tests = False

        test_conf = dict(run_cpp_tests=run_cpp_tests,
                         run_java_tests=run_java_tests,
                         file_changes_by_category=file_changes_by_category_str)
        if test_filter_re:
            test_conf.update(test_filter_re=test_filter_re)

        if not select_all_tests_for_now:
            # We only have this kind of fine-grained filtering for C++ test programs, and for Java
            # tests we either run all of them or none.
            test_conf['cpp_test_programs'] = test_basename_list
            if len(all_test_basenames) > 0:
                logging.info(
                    "{} C++ test programs should be run (out of {} possible, {}%)"
                    .format(
                        len(test_basename_list), len(all_test_basenames),
                        "%.1f" % (100.0 * len(test_basename_list) /
                                  len(all_test_basenames))))
            if len(test_basename_list) != len(all_test_basenames):
                logging.info(
                    "The following C++ test programs will be run: {}".format(
                        ", ".join(sorted(test_basename_list))))

        with open(args.output_test_config, 'w') as output_file:
            output_file.write(json.dumps(test_conf, indent=2) + "\n")
        logging.info("Wrote a test configuration to {}".format(
            args.output_test_config))
    else:
        # For ad-hoc command-line use, mostly for testing and sanity-checking.
        for node in sorted(results,
                           key=lambda node: [node.node_type.value, node.path]):
            print(node)
        logging.info("Found {} results".format(len(results)))
示例#22
0
    def make_postgres(self) -> None:
        self.set_env_vars('make')
        # Postgresql requires MAKELEVEL to be 0 or non-set when calling its make.
        # But in case YB project is built with make, MAKELEVEL is not 0 at this point.
        make_cmd = ['make', 'MAKELEVEL=0']
        if is_macos_arm64():
            make_cmd = ['arch', '-arm64'] + make_cmd

        make_parallelism_str: Optional[str] = os.environ.get(
            'YB_MAKE_PARALLELISM')
        make_parallelism: Optional[int] = None
        if make_parallelism_str is not None:
            make_parallelism = int(make_parallelism_str)
        if self.build_uses_remote_compilation and not self.remote_compilation_allowed:
            # Since we're building everything locally in this case, and YB_MAKE_PARALLELISM is
            # likely specified for distributed compilation, cap it at some factor times the number
            # of CPU cores.
            parallelism_cap = multiprocessing.cpu_count() * 2
            if make_parallelism:
                make_parallelism = min(parallelism_cap, make_parallelism)
            else:
                make_parallelism = parallelism_cap

        if make_parallelism:
            make_cmd += ['-j', str(int(make_parallelism))]

        self.set_env_var('YB_COMPILER_TYPE', self.compiler_type)

        # Create a script allowing to easily run "make" from the build directory with the right
        # environment.
        env_script_content = ''
        for env_var_name in CONFIG_ENV_VARS:
            env_var_value = os.environ.get(env_var_name)
            if env_var_value is None:
                raise RuntimeError("Expected env var %s to be set" %
                                   env_var_name)
            env_script_content += "export %s=%s\n" % (
                env_var_name, quote_for_bash(env_var_value))

        pg_compile_commands_paths = []

        third_party_extensions_dir = os.path.join(self.pg_build_root,
                                                  'third-party-extensions')
        work_dirs = [
            self.pg_build_root,
            os.path.join(self.pg_build_root, 'contrib'),
            third_party_extensions_dir
        ]

        for work_dir in work_dirs:
            with WorkDirContext(work_dir):
                # Create a script to run Make easily with the right environment.
                make_script_path = 'make.sh'
                with open(make_script_path, 'w') as out_f:
                    out_f.write('#!/usr/bin/env bash\n'
                                '. "${BASH_SOURCE%/*}"/env.sh\n'
                                'make "$@"\n')
                with open('env.sh', 'w') as out_f:
                    out_f.write(env_script_content)

                run_program(['chmod', 'u+x', make_script_path])

                make_cmd_suffix = []
                if work_dir == third_party_extensions_dir:
                    make_cmd_suffix = ['PG_CONFIG=' + self.pg_config_path]

                # Actually run Make.
                if is_verbose_mode():
                    logging.info("Running make in the %s directory", work_dir)

                complete_make_cmd = make_cmd + make_cmd_suffix
                complete_make_cmd_str = shlex_join(complete_make_cmd)
                complete_make_install_cmd = make_cmd + ['install'
                                                        ] + make_cmd_suffix
                attempt = 0
                while attempt <= TRANSIENT_BUILD_RETRIES:
                    attempt += 1
                    make_result = run_program(
                        complete_make_cmd_str,
                        stdout_stderr_prefix='make',
                        cwd=work_dir,
                        error_ok=True,
                        shell=True  # TODO: get rid of shell=True.
                    )
                    if make_result.failure():
                        transient_err = False
                        stderr_lines = make_result.get_stderr().split('\n')
                        for line in stderr_lines:
                            if any(transient_error_pattern in line
                                   for transient_error_pattern in
                                   TRANSIENT_BUILD_ERRORS):
                                transient_err = True
                                logging.info(f'Transient error: {line}')
                                break
                        if transient_err:
                            logging.info(
                                f"Transient error during build attempt {attempt}. "
                                f"Re-trying make command: {complete_make_cmd_str}."
                            )
                        else:
                            make_result.print_output_to_stdout()
                            raise RuntimeError("PostgreSQL compilation failed")
                    else:
                        logging.info(
                            "Successfully ran 'make' in the %s directory",
                            work_dir)
                        break  # No error, break out of retry loop
                else:
                    raise RuntimeError(
                        f"Maximum build attempts reached ({TRANSIENT_BUILD_RETRIES} attempts)."
                    )

                if self.build_type != 'compilecmds' or work_dir == self.pg_build_root:
                    run_program(
                        ' '.join(
                            shlex.quote(arg)
                            for arg in complete_make_install_cmd),
                        stdout_stderr_prefix='make_install',
                        cwd=work_dir,
                        error_ok=True,
                        shell=True  # TODO: get rid of shell=True.
                    ).print_output_and_raise_error_if_failed()
                    logging.info(
                        "Successfully ran 'make install' in the %s directory",
                        work_dir)
                else:
                    logging.info(
                        "Not running 'make install' in the %s directory since we are only "
                        "generating the compilation database", work_dir)

                if self.export_compile_commands:
                    logging.info(
                        "Generating the compilation database in directory '%s'",
                        work_dir)

                    compile_commands_path = os.path.join(
                        work_dir, 'compile_commands.json')
                    self.set_env_var('YB_PG_SKIP_CONFIG_STATUS', '1')
                    if not os.path.exists(compile_commands_path):
                        run_program(['compiledb', 'make', '-n'] +
                                    make_cmd_suffix,
                                    capture_output=False)
                    del os.environ['YB_PG_SKIP_CONFIG_STATUS']

                    if not os.path.exists(compile_commands_path):
                        raise RuntimeError(
                            "Failed to generate compilation database at: %s" %
                            compile_commands_path)
                    pg_compile_commands_paths.append(compile_commands_path)

        if self.export_compile_commands:
            self.write_compile_commands_files(pg_compile_commands_paths)
示例#23
0
def main():
    parser = argparse.ArgumentParser(
        description='A tool for working with the dependency graph')
    parser.add_argument('--verbose',
                        action='store_true',
                        help='Enable debug output')
    parser.add_argument(
        '-r',
        '--rebuild-graph',
        action='store_true',
        help='Rebuild the dependecy graph and save it to a file')
    parser.add_argument('--node-type',
                        help='Node type to look for',
                        default='any',
                        choices=['test', 'object', 'library', 'source', 'any'])
    parser.add_argument(
        '--file-regex',
        help='Regular expression for file names to select as initial nodes for '
        'querying the dependency graph.')
    parser.add_argument(
        '--file-name-glob',
        help='Like file-regex, but applies only to file name and uses the glob '
        'syntax instead of regex.')
    parser.add_argument(
        '--git-diff',
        help='Figure out the list of files to use as starting points in the '
        'dependency graph traversal by diffing the current state of the code '
        'against this commit. This could also be anything that could be '
        'passed to "git diff" as a single argument.')
    parser.add_argument(
        '--git-commit',
        help='Similar to --git-diff, but takes a git commit ref (e.g. sha1 or '
        'branch) and uses the set of files from that commit.')
    parser.add_argument(
        '--build-root',
        required=True,
        help='E.g. <some_root>/build/debug-gcc-dynamic-community')
    parser.add_argument('command', choices=COMMANDS, help='Command to perform')
    parser.add_argument(
        '--output-test-config',
        help=
        'Output a "test configuration file", which is a JSON containing the '
        'resulting list of C++ tests to run to this file, a flag indicating '
        'wheter to run Java tests or not, etc.')
    parser.add_argument(
        '--incomplete-build',
        action='store_true',
        help='Skip checking for file existence. Allows using the tool after '
        'build artifacts have been deleted.')
    args = parser.parse_args()

    if args.file_regex and args.file_name_glob:
        raise RuntimeError(
            '--file-regex and --file-name-glob are incompatible')

    cmd = args.command
    if (not args.file_regex and not args.file_name_glob
            and not args.rebuild_graph and not args.git_diff
            and not args.git_commit and cmd != SELF_TEST_CMD):
        raise RuntimeError(
            "Neither of --file-regex, --file-name-glob, --git-{diff,commit}, or "
            "--rebuild-graph are specified, and the command is not " +
            SELF_TEST_CMD)

    log_level = logging.INFO
    logging.basicConfig(
        level=log_level,
        format=
        "[%(filename)s:%(lineno)d] %(asctime)s %(levelname)s: %(message)s")

    conf = Configuration(args)
    if conf.file_regex and args.git_diff:
        raise RuntimeError(
            "--git-diff is incompatible with --file-{regex,name-glob}")

    if args.git_diff and args.git_commit:
        raise RuntimeError('--git-diff and --git-commit are incompatible')

    if args.git_commit:
        args.git_diff = "{}^..{}".format(args.git_commit, args.git_commit)

    graph_cache_path = os.path.join(args.build_root, 'dependency_graph.json')
    if args.rebuild_graph or not os.path.isfile(graph_cache_path):
        logging.info(
            "Generating a dependency graph at '{}'".format(graph_cache_path))
        dep_graph_builder = DependencyGraphBuilder(conf)
        dep_graph = dep_graph_builder.build()
        dep_graph.save_as_json(graph_cache_path)
    else:
        start_time = datetime.now()
        with open(graph_cache_path) as graph_input_file:
            dep_graph = DependencyGraph(conf,
                                        json_data=json.load(graph_input_file))
        logging.info("Loaded dependency graph from '%s' in %.2f sec" %
                     (graph_cache_path,
                      (datetime.now() - start_time).total_seconds()))
        dep_graph.validate_node_existence()

    if cmd == SELF_TEST_CMD:
        run_self_test(dep_graph)
        return

    updated_categories = None
    file_changes = []
    if args.git_diff:
        old_working_dir = os.getcwd()
        with WorkDirContext(conf.yb_src_root):
            git_diff_output = subprocess.check_output(
                ['git', 'diff', args.git_diff, '--name-only'])

            initial_nodes = set()
            file_paths = set()
            for file_path in git_diff_output.split("\n"):
                file_path = file_path.strip()
                if not file_path:
                    continue
                file_changes.append(file_path)
                # It is important that we invoke os.path.realpath with the current directory set to
                # the git repository root.
                file_path = os.path.realpath(file_path)
                file_paths.add(file_path)
                node = dep_graph.node_by_path.get(file_path)
                if node:
                    initial_nodes.add(node)

        if not initial_nodes:
            logging.warning(
                "Did not find any graph nodes for this set of files: {}".
                format(file_paths))
            for basename in set(
                [os.path.basename(file_path) for file_path in file_paths]):
                logging.warning("Nodes for basename '{}': {}".format(
                    basename, dep_graph.find_nodes_by_basename(basename)))

        file_changes_by_category = group_by(file_changes, get_file_category)
        for category, changes in file_changes_by_category.items():
            logging.info("File changes in category '{}':".format(category))
            for change in sorted(changes):
                logging.info("    {}".format(change))
        updated_categories = set(file_changes_by_category.keys())

    elif conf.file_regex:
        logging.info("Using file name regex: {}".format(conf.file_regex))
        initial_nodes = dep_graph.find_nodes_by_regex(conf.file_regex)
    else:
        raise RuntimeError(
            "Could not figure out how to generate the initial set of files")

    results = set()
    if cmd == LIST_AFFECTED_CMD:
        results = dep_graph.find_affected_nodes(initial_nodes, args.node_type)
    elif cmd == LIST_DEPS_CMD:
        for node in initial_nodes:
            results.update(node.deps)
    elif cmd == LIST_REVERSE_DEPS_CMD:
        for node in initial_nodes:
            results.update(node.reverse_deps)
    else:
        raise RuntimeError("Unimplemented command '{}'".format(command))

    if args.output_test_config:
        test_basename_list = sorted([
            os.path.basename(node.path) for node in results
            if node.node_type == 'test'
        ])
        affected_basenames = set(
            [os.path.basename(node.path) for node in results])

        # These are ALL tests, not just tests affected by the changes in question, used mostly
        # for logging.
        all_test_programs = [
            node for node in dep_graph.get_nodes() if node.node_type == 'test'
        ]
        all_test_basenames = set(
            [os.path.basename(node.path) for node in all_test_programs])

        # A very conservative way to decide whether to run all tests. If there are changes in any
        # categories (meaning the changeset is non-empty), and there are changes in categories other
        # than C++ / Java / files known not to affect unit tests, we force re-running all tests.
        unsafe_categories = updated_categories - CATEGORIES_NOT_CAUSING_RERUN_OF_ALL_TESTS
        user_said_all_tests = get_bool_env_var('YB_RUN_ALL_TESTS')
        run_all_tests = bool(unsafe_categories) or user_said_all_tests

        user_said_all_cpp_tests = get_bool_env_var('YB_RUN_ALL_CPP_TESTS')
        user_said_all_java_tests = get_bool_env_var('YB_RUN_ALL_JAVA_TESTS')
        cpp_files_changed = 'c++' in updated_categories
        java_files_changed = 'java' in updated_categories
        yb_master_or_tserver_changed = bool(affected_basenames
                                            & set(['yb-master', 'yb-tserver']))

        run_cpp_tests = run_all_tests or cpp_files_changed or user_said_all_cpp_tests
        run_java_tests = (run_all_tests or java_files_changed
                          or yb_master_or_tserver_changed
                          or user_said_all_java_tests)

        if run_all_tests:
            if user_said_all_tests:
                logging.info(
                    "User explicitly specified that all tests should be run")
            else:
                logging.info(
                    "All tests should be run based on file changes in these categories: {}"
                    .format(', '.join(sorted(unsafe_categories))))
        else:
            if run_cpp_tests:
                if user_said_all_cpp_tests:
                    logging.info(
                        "User explicitly specified that all C++ tests should be run"
                    )
                else:
                    logging.info(
                        'Will run some C++ tests, some C++ files changed')
            if run_java_tests:
                if user_said_all_java_tests:
                    logging.info(
                        "User explicitly specified that all Java tests should be run"
                    )
                else:
                    logging.info('Will run all Java tests, ' + ' and '.join(
                        (['some Java files changed'] if java_files_changed else
                         []) + (['yb-{master,tserver} binaries changed']
                                if yb_master_or_tserver_changed else [])))

        if run_cpp_tests and not test_basename_list and not run_all_tests:
            logging.info(
                'There are no C++ test programs affected by the changes, '
                'will skip running C++ tests.')
            run_cpp_tests = False

        test_conf = dict(run_cpp_tests=run_cpp_tests,
                         run_java_tests=run_java_tests,
                         file_changes_by_category=file_changes_by_category)
        if not run_all_tests:
            test_conf['cpp_test_programs'] = test_basename_list
            logging.info(
                "{} C++ test programs should be run (out of {} possible, {}%)".
                format(
                    len(test_basename_list), len(all_test_basenames),
                    "%.1f" % (100.0 * len(test_basename_list) /
                              len(all_test_basenames))))
            if len(test_basename_list) != len(all_test_basenames):
                logging.info(
                    "The following C++ test programs will be run: {}".format(
                        ", ".join(sorted(test_basename_list))))

        with open(args.output_test_config, 'w') as output_file:
            output_file.write(json.dumps(test_conf, indent=2) + "\n")
        logging.info("Wrote a test configuration to {}".format(
            args.output_test_config))
    else:
        # For ad-hoc command-line use, mostly for testing and sanity-checking.
        for node in sorted(results,
                           key=lambda node: [node.node_type, node.path]):
            print(node)
        logging.info("Found {} results".format(len(results)))