Exemplo n.º 1
0
  def __init__(self, *args, **kwargs):
    super(RunTracker, self).__init__(*args, **kwargs)
    run_timestamp = time.time()
    cmd_line = ' '.join(['pants'] + sys.argv[1:])

    # run_id is safe for use in paths.
    millis = int((run_timestamp * 1000) % 1000)
    run_id = 'pants_run_{}_{}_{}'.format(
               time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(run_timestamp)), millis,
               uuid.uuid4().hex)

    info_dir = os.path.join(self.get_options().pants_workdir, self.options_scope)
    self.run_info_dir = os.path.join(info_dir, run_id)
    self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info'))
    self.run_info.add_basic_info(run_id, run_timestamp)
    self.run_info.add_info('cmd_line', cmd_line)

    # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists.
    link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), 'latest')

    relative_symlink(self.run_info_dir, link_to_latest)

    # Time spent in a workunit, including its children.
    self.cumulative_timings = AggregatedTimings(os.path.join(self.run_info_dir,
                                                             'cumulative_timings'))

    # Time spent in a workunit, not including its children.
    self.self_timings = AggregatedTimings(os.path.join(self.run_info_dir, 'self_timings'))

    # Hit/miss stats for the artifact cache.
    self.artifact_cache_stats = \
      ArtifactCacheStats(os.path.join(self.run_info_dir, 'artifact_cache_stats'))

    # Number of threads for foreground work.
    self._num_foreground_workers = self.get_options().num_foreground_workers

    # Number of threads for background work.
    self._num_background_workers = self.get_options().num_background_workers

    # We report to this Report.
    self.report = None

    # self._threadlocal.current_workunit contains the current workunit for the calling thread.
    # Note that multiple threads may share a name (e.g., all the threads in a pool).
    self._threadlocal = threading.local()

    # For main thread work. Created on start().
    self._main_root_workunit = None

    # For background work.  Created lazily if needed.
    self._background_worker_pool = None
    self._background_root_workunit = None

    # Trigger subproc pool init while our memory image is still clean (see SubprocPool docstring).
    SubprocPool.set_num_processes(self._num_foreground_workers)
    SubprocPool.foreground()

    self._aborted = False
Exemplo n.º 2
0
    def start(self, all_options, run_start_time=None):
        """Start tracking this pants run."""
        if self.run_info:
            raise AssertionError(
                "RunTracker.start must not be called multiple times.")

        # Initialize the run.

        info_dir = os.path.join(self.options.pants_workdir, self.options_scope)
        self.run_info_dir = os.path.join(info_dir, self.run_id)
        self.run_info = RunInfo(os.path.join(self.run_info_dir, "info"))
        self.run_info.add_basic_info(self.run_id, self._run_timestamp)
        self.run_info.add_info("cmd_line", self._cmd_line)
        if self.options.parent_build_id:
            self.run_info.add_info("parent_build_id",
                                   self.options.parent_build_id)

        # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists.
        link_to_latest = os.path.join(os.path.dirname(self.run_info_dir),
                                      "latest")

        relative_symlink(self.run_info_dir, link_to_latest)

        # Time spent in a workunit, including its children.
        self.cumulative_timings = AggregatedTimings(
            os.path.join(self.run_info_dir, "cumulative_timings"))

        # Time spent in a workunit, not including its children.
        self.self_timings = AggregatedTimings(
            os.path.join(self.run_info_dir, "self_timings"))
        # Daemon stats.
        self.pantsd_stats = PantsDaemonStats()

        self._all_options = all_options

        self.report = Report()

        # Set up the JsonReporter for V2 stats.
        if self._stats_version == 2:
            json_reporter_settings = JsonReporter.Settings(
                log_level=Report.INFO)
            self.json_reporter = JsonReporter(self, json_reporter_settings)
            self.report.add_reporter("json", self.json_reporter)

        self.report.open()

        # And create the workunit.
        self._main_root_workunit = WorkUnit(run_info_dir=self.run_info_dir,
                                            parent=None,
                                            name=RunTracker.DEFAULT_ROOT_NAME,
                                            cmd=None)
        self.register_thread(self._main_root_workunit)
        # Set the true start time in the case of e.g. the daemon.
        self._main_root_workunit.start(run_start_time)
        self.report.start_workunit(self._main_root_workunit)
Exemplo n.º 3
0
    def initialize(self, all_options):
        """Create run_info and relevant directories, and return the run id.

    Must be called before `start`.
    """
        if self.run_info:
            raise AssertionError(
                'RunTracker.initialize must not be called multiple times.')

        # Initialize the run.

        # Select a globally unique ID for the run, that sorts by time.
        millis = int((self._run_timestamp * 1000) % 1000)
        # run_uuid is used as a part of run_id and also as a trace_id for Zipkin tracing
        run_uuid = uuid.uuid4().hex
        run_id = 'pants_run_{}_{}_{}'.format(
            time.strftime('%Y_%m_%d_%H_%M_%S',
                          time.localtime(self._run_timestamp)), millis,
            run_uuid)

        info_dir = os.path.join(self.get_options().pants_workdir,
                                self.options_scope)
        self.run_info_dir = os.path.join(info_dir, run_id)
        self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info'))
        self.run_info.add_basic_info(run_id, self._run_timestamp)
        self.run_info.add_info('cmd_line', self._cmd_line)

        # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists.
        link_to_latest = os.path.join(os.path.dirname(self.run_info_dir),
                                      'latest')

        relative_symlink(self.run_info_dir, link_to_latest)

        # Time spent in a workunit, including its children.
        self.cumulative_timings = AggregatedTimings(
            os.path.join(self.run_info_dir, 'cumulative_timings'))

        # Time spent in a workunit, not including its children.
        self.self_timings = AggregatedTimings(
            os.path.join(self.run_info_dir, 'self_timings'))

        # Hit/miss stats for the artifact cache.
        self.artifact_cache_stats = ArtifactCacheStats(
            os.path.join(self.run_info_dir, 'artifact_cache_stats'))

        # Daemon stats.
        self.pantsd_stats = PantsDaemonStats()

        self._all_options = all_options

        return (run_id, run_uuid)
Exemplo n.º 4
0
    def start(self, all_options: Options, run_start_time: float) -> None:
        """Start tracking this pants run."""
        if self.run_info:
            raise AssertionError(
                "RunTracker.start must not be called multiple times.")

        # Initialize the run.

        info_dir = os.path.join(self.options.pants_workdir, self.options_scope)
        self.run_info_dir = os.path.join(info_dir, self.run_id)
        self.run_info = RunInfo(os.path.join(self.run_info_dir, "info"))
        self.run_info.add_basic_info(self.run_id, self._run_timestamp)
        self.run_info.add_info("cmd_line", self._cmd_line)

        # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists.
        link_to_latest = os.path.join(os.path.dirname(self.run_info_dir),
                                      "latest")

        relative_symlink(self.run_info_dir, link_to_latest)

        # Time spent in a workunit, including its children.
        self.cumulative_timings = AggregatedTimings(
            os.path.join(self.run_info_dir, "cumulative_timings"))

        # Time spent in a workunit, not including its children.
        self.self_timings = AggregatedTimings(
            os.path.join(self.run_info_dir, "self_timings"))
        # pantsd stats.
        self._pantsd_metrics: Dict[str, int] = dict()

        self._all_options = all_options

        self.report = Report()
        self.report.open()

        # And create the workunit.
        self._main_root_workunit = WorkUnit(run_info_dir=self.run_info_dir,
                                            parent=None,
                                            name=RunTracker.DEFAULT_ROOT_NAME,
                                            cmd=None)
        self.register_thread(self._main_root_workunit)
        # Set the true start time in the case of e.g. the daemon.
        self._main_root_workunit.start(run_start_time)
        self.report.start_workunit(self._main_root_workunit)

        goal_names: Tuple[str, ...] = tuple(all_options.goals)
        self._v2_goal_rule_names = goal_names

        self.run_logs_file = Path(self.run_info_dir, "logs")
        self.native.set_per_run_log_path(str(self.run_logs_file))
Exemplo n.º 5
0
    def get_critical_path_timings(self):
        """Get the cumulative timings of each goal and all of the goals it (transitively) depended
        on."""
        setup_workunit = WorkUnitLabel.SETUP.lower()
        transitive_dependencies = dict()
        for goal_info in self._sorted_goal_infos:
            deps = transitive_dependencies.setdefault(goal_info.goal.name, set())
            for dep in goal_info.goal_dependencies:
                deps.add(dep.name)
                deps.update(transitive_dependencies.get(dep.name))
            # Add setup workunit as a dep manually, as its unaccounted for, otherwise.
            deps.add(setup_workunit)
        raw_timings = dict()
        for entry in self.cumulative_timings.get_all():
            raw_timings[entry["label"]] = entry["timing"]

        critical_path_timings = AggregatedTimings()

        def add_to_timings(goal, dep):
            tracking_label = get_label(goal)
            timing_label = get_label(dep)
            critical_path_timings.add_timing(tracking_label, raw_timings.get(timing_label, 0.0))

        def get_label(dep):
            return f"{RunTracker.DEFAULT_ROOT_NAME}:{dep}"

        # Add setup workunit to critical_path_timings manually, as its unaccounted for, otherwise.
        add_to_timings(setup_workunit, setup_workunit)

        for goal, deps in transitive_dependencies.items():
            add_to_timings(goal, goal)
            for dep in deps:
                add_to_timings(goal, dep)

        return critical_path_timings
Exemplo n.º 6
0
    def initialize(self, all_options):
        """Create run_info and relevant directories, and return the run id.

    Must be called before `start`.
    """
        if self.run_info:
            raise AssertionError(
                'RunTracker.initialize must not be called multiple times.')

        # Initialize the run.

        info_dir = os.path.join(self.get_options().pants_workdir,
                                self.options_scope)
        self.run_info_dir = os.path.join(info_dir, self.run_id)
        self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info'))
        self.run_info.add_basic_info(self.run_id, self._run_timestamp)
        self.run_info.add_info('cmd_line', self._cmd_line)
        if self.get_options().parent_build_id:
            self.run_info.add_info('parent_build_id',
                                   self.get_options().parent_build_id)

        # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists.
        link_to_latest = os.path.join(os.path.dirname(self.run_info_dir),
                                      'latest')

        relative_symlink(self.run_info_dir, link_to_latest)

        # Time spent in a workunit, including its children.
        self.cumulative_timings = AggregatedTimings(
            os.path.join(self.run_info_dir, 'cumulative_timings'))

        # Time spent in a workunit, not including its children.
        self.self_timings = AggregatedTimings(
            os.path.join(self.run_info_dir, 'self_timings'))

        # Hit/miss stats for the artifact cache.
        self.artifact_cache_stats = ArtifactCacheStats(
            os.path.join(self.run_info_dir, 'artifact_cache_stats'))

        # Daemon stats.
        self.pantsd_stats = PantsDaemonStats()

        self._all_options = all_options

        return (self.run_id, self.run_uuid)
Exemplo n.º 7
0
  def get_critical_path_timings(self):
    """
    Get the cumulative timings of each goal and all of the goals it (transitively) depended on.
    """
    transitive_dependencies = dict()
    for goal_info in self._sorted_goal_infos:
      deps = transitive_dependencies.setdefault(goal_info.goal.name, set())
      for dep in goal_info.goal_dependencies:
        deps.add(dep.name)
        deps.update(transitive_dependencies.get(dep.name))

    raw_timings = dict()
    for entry in self.cumulative_timings.get_all():
      raw_timings[entry["label"]] = entry["timing"]

    timings = AggregatedTimings()
    for goal, deps in transitive_dependencies.items():
      label = "{}:{}".format(RunTracker.DEFAULT_ROOT_NAME, goal)
      timings.add_timing(label, raw_timings.get(label, 0.0))
      for dep in deps:
        dep_label = "{}:{}".format(RunTracker.DEFAULT_ROOT_NAME, dep)
        timings.add_timing(label, raw_timings.get(dep_label, 0.0))
    return timings
Exemplo n.º 8
0
    def __init__(self,
                 info_dir,
                 stats_upload_url=None,
                 stats_upload_timeout=2,
                 num_foreground_workers=8,
                 num_background_workers=8):
        self.run_timestamp = time.time(
        )  # A double, so we get subsecond precision for ids.
        cmd_line = ' '.join(['./pants'] + sys.argv[1:])

        # run_id is safe for use in paths.
        millis = (self.run_timestamp * 1000) % 1000
        run_id = 'pants_run_%s_%d' % \
                 (time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(self.run_timestamp)), millis)

        self.run_info_dir = os.path.join(info_dir, run_id)
        self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info'))
        self.run_info.add_basic_info(run_id, self.run_timestamp)
        self.run_info.add_info('cmd_line', cmd_line)
        self.stats_url = stats_upload_url
        self.stats_timeout = stats_upload_timeout

        # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists.
        link_to_latest = os.path.join(os.path.dirname(self.run_info_dir),
                                      'latest')

        try:
            if os.path.lexists(link_to_latest):
                os.unlink(link_to_latest)
            os.symlink(self.run_info_dir, link_to_latest)
        except OSError as e:
            # Another run may beat us to deletion or creation.
            if not (e.errno == errno.EEXIST or e.errno == errno.ENOENT):
                raise

        # Time spent in a workunit, including its children.
        self.cumulative_timings = AggregatedTimings(
            os.path.join(self.run_info_dir, 'cumulative_timings'))

        # Time spent in a workunit, not including its children.
        self.self_timings = AggregatedTimings(
            os.path.join(self.run_info_dir, 'self_timings'))

        # Hit/miss stats for the artifact cache.
        self.artifact_cache_stats = \
          ArtifactCacheStats(os.path.join(self.run_info_dir, 'artifact_cache_stats'))

        # Number of threads for foreground work.
        self._num_foreground_workers = num_foreground_workers

        # Number of threads for background work.
        self._num_background_workers = num_background_workers

        # We report to this Report.
        self.report = None

        # self._threadlocal.current_workunit contains the current workunit for the calling thread.
        # Note that multiple threads may share a name (e.g., all the threads in a pool).
        self._threadlocal = threading.local()

        # For main thread work. Created on start().
        self._main_root_workunit = None

        # For concurrent foreground work.  Created lazily if needed.
        # Associated with the main thread's root workunit.
        self._foreground_worker_pool = None

        # For background work.  Created lazily if needed.
        self._background_worker_pool = None
        self._background_root_workunit = None

        # Trigger subproc pool init while our memory image is still clean (see SubprocPool docstring)
        SubprocPool.foreground()

        self._aborted = False