Exemplo n.º 1
0
    def output_profile_line(
        self,
        fname: Filename,
        line_no: LineNumber,
        line: SyntaxLine,
        console: Console,
        tbl: Table,
        stats: ScaleneStatistics,
        profile_this_code: Callable[[Filename, LineNumber], bool],
        force_print: bool = False,
        suppress_lineno_print: bool = False,
        is_function_summary: bool = False,
        profile_memory: bool = False,
        reduced_profile: bool = False,
    ) -> bool:
        """Print at most one line of the profile (true == printed one)."""
        if not force_print and not profile_this_code(fname, line_no):
            return False
        current_max = stats.max_footprint
        # Prepare output values.
        n_cpu_samples_c = stats.cpu_samples_c[fname][line_no]
        # Correct for negative CPU sample counts. This can happen
        # because of floating point inaccuracies, since we perform
        # subtraction to compute it.
        if n_cpu_samples_c < 0:
            n_cpu_samples_c = 0
        n_cpu_samples_python = stats.cpu_samples_python[fname][line_no]
        n_gpu_samples = stats.gpu_samples[fname][line_no]

        # Compute percentages of CPU time.
        if stats.total_cpu_samples != 0:
            n_cpu_percent_c = n_cpu_samples_c * 100 / stats.total_cpu_samples
            n_cpu_percent_python = (n_cpu_samples_python * 100 /
                                    stats.total_cpu_samples)
        else:
            n_cpu_percent_c = 0
            n_cpu_percent_python = 0

        if stats.total_gpu_samples != 0:
            n_gpu_percent = n_gpu_samples * 100 / stats.total_gpu_samples
        else:
            n_gpu_percent = 0

        # Now, memory stats.
        # Accumulate each one from every byte index.
        n_malloc_mb = 0.0
        n_python_malloc_mb = 0.0
        n_free_mb = 0.0
        for index in stats.bytei_map[fname][line_no]:
            mallocs = stats.memory_malloc_samples[fname][line_no][index]
            n_malloc_mb += mallocs
            n_python_malloc_mb += stats.memory_python_samples[fname][line_no][
                index]
            frees = stats.memory_free_samples[fname][line_no][index]
            n_free_mb += frees

        n_usage_fraction = (0 if not stats.total_memory_malloc_samples else
                            n_malloc_mb / stats.total_memory_malloc_samples)
        n_python_fraction = (
            0 if not n_malloc_mb else n_python_malloc_mb /
            stats.total_memory_malloc_samples  # was / n_malloc_mb
        )

        if False:
            # Currently disabled; possibly use in another column?
            # Correct for number of samples
            for bytei in stats.memory_malloc_count[fname][
                    line_no]:  # type : ignore
                n_malloc_mb /= stats.memory_malloc_count[fname][line_no][bytei]
                n_python_malloc_mb /= stats.memory_malloc_count[fname][
                    line_no][bytei]
            for bytei in stats.memory_free_count[fname][line_no]:
                n_free_mb /= stats.memory_free_count[fname][line_no][bytei]

        n_growth_mb = n_malloc_mb - n_free_mb
        if -1 < n_growth_mb < 0:
            # Don't print out "-0".
            n_growth_mb = 0

        # Finally, print results.
        n_cpu_percent_c_str: str = ("" if n_cpu_percent_c < 1 else "%5.0f%%" %
                                    n_cpu_percent_c)

        n_gpu_percent_str: str = ("" if n_gpu_percent < 1 else "%3.0f%%" %
                                  n_gpu_percent)

        n_cpu_percent_python_str: str = ("" if n_cpu_percent_python < 1 else
                                         "%5.0f%%" % n_cpu_percent_python)
        n_growth_mb_str: str = ("" if
                                (not n_growth_mb and not n_usage_fraction) else
                                "%5.0f" % n_growth_mb)
        n_usage_fraction_str: str = ("" if n_usage_fraction < 0.01 else
                                     "%3.0f%%" % (100 * n_usage_fraction))
        n_python_fraction_str: str = ("" if n_python_fraction < 0.01 else
                                      "%5.0f%%" % (100 * n_python_fraction))
        n_copy_b = stats.memcpy_samples[fname][line_no]
        n_copy_mb_s = n_copy_b / (1024 * 1024 * stats.elapsed_time)
        n_copy_mb_s_str: str = ("" if n_copy_mb_s < 0.5 else "%6.0f" %
                                n_copy_mb_s)

        n_cpu_percent = n_cpu_percent_c + n_cpu_percent_python
        # Only report utilization where there is more than 1% CPU total usage,
        # and the standard error of the mean is low (meaning it's an accurate estimate).
        n_sys_percent = n_cpu_percent * (
            1.0 - (stats.cpu_utilization[fname][line_no].mean()))
        sys_str: str = ("" if n_sys_percent < 1
                        or stats.cpu_utilization[fname][line_no].size() <= 1
                        or stats.cpu_utilization[fname][line_no].sem() > 0.025
                        or stats.cpu_utilization[fname][line_no].mean() > 0.99
                        else "%3.0f%%" % (n_sys_percent))
        if not is_function_summary:
            print_line_no = "" if suppress_lineno_print else str(line_no)
        else:
            print_line_no = ("" if fname not in stats.firstline_map else str(
                stats.firstline_map[fname]))
        if profile_memory:
            spark_str: str = ""
            # Scale the sparkline by the usage fraction.
            samples = stats.per_line_footprint_samples[fname][line_no]
            for i in range(0, len(samples.get())):
                samples.get()[i] *= n_usage_fraction
            if samples.get():
                _, _, spark_str = sparkline.generate(
                    samples.get()[0:samples.len()], 0, current_max)

            # Red highlight
            ncpps: Any = ""
            ncpcs: Any = ""
            nufs: Any = ""
            ngpus: Any = ""

            if (n_usage_fraction >= self.highlight_percentage
                    or (n_cpu_percent_c + n_cpu_percent_python + n_gpu_percent)
                    >= self.highlight_percentage):
                ncpps = Text.assemble((n_cpu_percent_python_str, "bold red"))
                ncpcs = Text.assemble((n_cpu_percent_c_str, "bold red"))
                nufs = Text.assemble(
                    (spark_str + n_usage_fraction_str, "bold red"))
                ngpus = Text.assemble((n_gpu_percent_str, "bold red"))
            else:
                ncpps = n_cpu_percent_python_str
                ncpcs = n_cpu_percent_c_str
                ngpus = n_gpu_percent_str
                nufs = spark_str + n_usage_fraction_str

            if not reduced_profile or ncpps + ncpcs + nufs:
                tbl.add_row(
                    print_line_no,
                    ncpps,  # n_cpu_percent_python_str,
                    ncpcs,  # n_cpu_percent_c_str,
                    sys_str,
                    ngpus,
                    n_python_fraction_str,
                    n_growth_mb_str,
                    nufs,  # spark_str + n_usage_fraction_str,
                    n_copy_mb_s_str,
                    line,
                )
                return True
            else:
                return False

        else:

            # Red highlight
            if (n_cpu_percent_c + n_cpu_percent_python +
                    n_gpu_percent) >= self.highlight_percentage:
                ncpps = Text.assemble((n_cpu_percent_python_str, "bold red"))
                ncpcs = Text.assemble((n_cpu_percent_c_str, "bold red"))
                ngpus = Text.assemble((n_gpu_percent_str, "bold red"))
            else:
                ncpps = n_cpu_percent_python_str
                ncpcs = n_cpu_percent_c_str
                ngpus = n_gpu_percent_str

            if not reduced_profile or ncpps + ncpcs:
                tbl.add_row(
                    print_line_no,
                    ncpps,  # n_cpu_percent_python_str,
                    ncpcs,  # n_cpu_percent_c_str,
                    sys_str,
                    ngpus,  # n_gpu_percent_str
                    line,
                )
                return True
            else:
                return False
Exemplo n.º 2
0
    def output_profiles(
        self,
        stats: ScaleneStatistics,
        pid: int,
        profile_this_code: Callable[[Filename, LineNumber], bool],
        python_alias_dir_name: Filename,
        python_alias_dir: Filename,
        profile_memory: bool = True,
        reduced_profile: bool = False,
    ) -> bool:
        """Write the profile out."""
        # Get the children's stats, if any.
        if not pid:
            stats.merge_stats(python_alias_dir_name)
            try:
                shutil.rmtree(python_alias_dir)
            except BaseException:
                pass
        current_max: float = stats.max_footprint
        # If we've collected any samples, dump them.
        if (not stats.total_cpu_samples
                and not stats.total_memory_malloc_samples
                and not stats.total_memory_free_samples):
            # Nothing to output.
            return False
        # Collect all instrumented filenames.
        all_instrumented_files: List[Filename] = list(
            set(
                list(stats.cpu_samples_python.keys()) +
                list(stats.cpu_samples_c.keys()) +
                list(stats.memory_free_samples.keys()) +
                list(stats.memory_malloc_samples.keys())))
        if not all_instrumented_files:
            # We didn't collect samples in source files.
            return False
        title = Text()
        mem_usage_line: Union[Text, str] = ""
        growth_rate = 0.0
        if profile_memory:
            samples = stats.memory_footprint_samples
            if len(samples.get()) > 0:
                # Output a sparkline as a summary of memory usage over time.
                _, _, spark_str = sparkline.generate(
                    samples.get()[0:samples.len()], 0, current_max)
                # Compute growth rate (slope), between 0 and 1.
                if stats.allocation_velocity[1] > 0:
                    growth_rate = (100.0 * stats.allocation_velocity[0] /
                                   stats.allocation_velocity[1])
                # If memory used is > 1GB, use GB as the unit.
                if current_max > 1024:
                    mem_usage_line = Text.assemble(
                        "Memory usage: ",
                        ((spark_str, "blue")),
                        (" (max: %6.2fGB, growth rate: %3.0f%%)\n" %
                         ((current_max / 1024), growth_rate)),
                    )
                else:
                    # Otherwise, use MB.
                    mem_usage_line = Text.assemble(
                        "Memory usage: ",
                        ((spark_str, "blue")),
                        (" (max: %6.2fMB, growth rate: %3.0f%%)\n" %
                         (current_max, growth_rate)),
                    )

        null = open("/dev/null", "w")
        # Get column width of the terminal and adjust to fit.
        # Note that Scalene works best with at least 132 columns.
        if self.html:
            column_width = 132
        else:
            column_width = shutil.get_terminal_size().columns
        console = Console(
            width=column_width,
            record=True,
            force_terminal=True,
            file=null,
        )
        # Build a list of files we will actually report on.
        report_files: List[Filename] = []
        # Sort in descending order of CPU cycles, and then ascending order by filename
        for fname in sorted(
                all_instrumented_files,
                key=lambda f: (-(stats.cpu_samples[f]), f),
        ):
            fname = Filename(fname)
            try:
                percent_cpu_time = (100 * stats.cpu_samples[fname] /
                                    stats.total_cpu_samples)
            except ZeroDivisionError:
                percent_cpu_time = 0

            # Ignore files responsible for less than some percent of execution time and fewer than a threshold # of mallocs.
            if (stats.malloc_samples[fname] < self.malloc_threshold
                    and percent_cpu_time < self.cpu_percent_threshold):
                continue
            report_files.append(fname)

        # Don't actually output the profile if we are a child process.
        # Instead, write info to disk for the main process to collect.
        if pid:
            stats.output_stats(pid, python_alias_dir_name)
            return True

        for fname in report_files:
            # Print header.
            percent_cpu_time = (100 * stats.cpu_samples[fname] /
                                stats.total_cpu_samples)
            new_title = mem_usage_line + (
                "%s: %% of time = %6.2f%% out of %6.2fs." %
                (fname, percent_cpu_time, stats.elapsed_time))
            # Only display total memory usage once.
            mem_usage_line = ""

            tbl = Table(
                box=box.MINIMAL_HEAVY_HEAD,
                title=new_title,
                collapse_padding=True,
                width=column_width - 1,
            )

            tbl.add_column("Line", justify="right", no_wrap=True)
            tbl.add_column("Time %\nPython", no_wrap=True)
            tbl.add_column("Time %\nnative", no_wrap=True)
            tbl.add_column("Sys\n%", no_wrap=True)
            tbl.add_column("GPU\n%", no_wrap=True)

            other_columns_width = 0  # Size taken up by all columns BUT code

            if profile_memory:
                tbl.add_column("Mem %\nPython", no_wrap=True)
                tbl.add_column("Net\n(MB)", no_wrap=True)
                tbl.add_column("Memory usage\nover time / %", no_wrap=True)
                tbl.add_column("Copy\n(MB/s)", no_wrap=True)
                other_columns_width = 72 + 5  # GPU
                tbl.add_column(
                    "\n" + fname,
                    width=column_width - other_columns_width,
                    no_wrap=True,
                )
            else:
                other_columns_width = 36 + 5  # GPU
                tbl.add_column(
                    "\n" + fname,
                    width=column_width - other_columns_width,
                    no_wrap=True,
                )

            # Print out the the profile for the source, line by line.
            with open(fname, "r") as source_file:
                # We track whether we should put in ellipsis (for reduced profiles)
                # or not.
                did_print = True  # did we print a profile line last time?
                code_lines = source_file.read()
                # Generate syntax highlighted version for the whole file,
                # which we will consume a line at a time.
                # See https://github.com/willmcgugan/rich/discussions/965#discussioncomment-314233
                syntax_highlighted = None
                if self.html:
                    syntax_highlighted = Syntax(
                        code_lines,
                        "python",
                        theme="default",
                        line_numbers=False,
                        code_width=None,
                    )
                else:
                    syntax_highlighted = Syntax(
                        code_lines,
                        "python",
                        theme="vim",
                        line_numbers=False,
                        code_width=None,
                    )
                capture_console = Console(
                    width=column_width - other_columns_width,
                    force_terminal=True,
                )
                formatted_lines = [
                    SyntaxLine(segments) for segments in
                    capture_console.render_lines(syntax_highlighted)
                ]
                for line_no, line in enumerate(formatted_lines, start=1):
                    old_did_print = did_print
                    did_print = self.output_profile_line(
                        fname,
                        LineNumber(line_no),
                        line,
                        console,
                        tbl,
                        stats,
                        profile_this_code,
                        profile_memory=profile_memory,
                        force_print=True,
                        suppress_lineno_print=False,
                        is_function_summary=False,
                        reduced_profile=reduced_profile,
                    )
                    if old_did_print and not did_print:
                        # We are skipping lines, so add an ellipsis.
                        tbl.add_row("...")
                    old_did_print = did_print

            # Potentially print a function summary.
            fn_stats = stats.build_function_stats(fname)
            print_fn_summary = False
            for fn_name in fn_stats.cpu_samples_python:
                if fn_name == fname:
                    continue
                print_fn_summary = True
                break

            if print_fn_summary:
                tbl.add_row(None, end_section=True)
                txt = Text.assemble("function summary", style="bold italic")
                if profile_memory:
                    tbl.add_row("", "", "", "", "", "", "", "", "", txt)
                else:
                    tbl.add_row("", "", "", "", "", txt)

                for fn_name in sorted(
                        fn_stats.cpu_samples_python,
                        key=lambda k: stats.firstline_map[k],
                ):
                    if fn_name == fname:
                        continue
                    if self.html:
                        syntax_highlighted = Syntax(
                            fn_name,
                            "python",
                            theme="default",
                            line_numbers=False,
                            code_width=None,
                        )
                    else:
                        syntax_highlighted = Syntax(
                            fn_name,
                            "python",
                            theme="vim",
                            line_numbers=False,
                            code_width=None,
                        )
                    # force print, suppress line numbers
                    self.output_profile_line(
                        fn_name,
                        LineNumber(1),
                        syntax_highlighted,  # type: ignore
                        console,
                        tbl,
                        fn_stats,
                        profile_this_code,
                        profile_memory=profile_memory,
                        force_print=True,
                        suppress_lineno_print=True,
                        is_function_summary=True,
                        reduced_profile=reduced_profile,
                    )

            console.print(tbl)

            # Report top K lines (currently 5) in terms of net memory consumption.
            net_mallocs: Dict[LineNumber, float] = defaultdict(float)
            for line_no in stats.bytei_map[fname]:
                for bytecode_index in stats.bytei_map[fname][line_no]:
                    net_mallocs[line_no] += (stats.memory_malloc_samples[fname]
                                             [line_no][bytecode_index] -
                                             stats.memory_free_samples[fname]
                                             [line_no][bytecode_index])
            net_mallocs = OrderedDict(
                sorted(net_mallocs.items(), key=itemgetter(1), reverse=True))
            if len(net_mallocs) > 0:
                console.print("Top net memory consumption, by line:")
                number = 1
                for net_malloc_lineno in net_mallocs:
                    if net_mallocs[net_malloc_lineno] <= 1:
                        break
                    if number > 5:
                        break
                    output_str = ("(" + str(number) + ") " +
                                  ("%5.0f" % (net_malloc_lineno)) + ": " +
                                  ("%5.0f" %
                                   (net_mallocs[net_malloc_lineno])) + " MB")
                    console.print(output_str)
                    number += 1

            # Only report potential leaks if the allocation velocity (growth rate) is above some threshold
            # FIXME: fixed at 1% for now.
            # We only report potential leaks where the confidence interval is quite tight and includes 1.
            growth_rate_threshold = 0.01
            leak_reporting_threshold = 0.05
            leaks = []
            if growth_rate / 100 > growth_rate_threshold:
                vec = list(stats.leak_score[fname].values())
                keys = list(stats.leak_score[fname].keys())
                for index, item in enumerate(stats.leak_score[fname].values()):
                    # See https://en.wikipedia.org/wiki/Rule_of_succession
                    frees = item[1]
                    allocs = item[0]
                    expected_leak = (frees + 1) / (frees + allocs + 2)
                    if expected_leak <= leak_reporting_threshold:
                        leaks.append((
                            keys[index],
                            1 - expected_leak,
                            net_mallocs[keys[index]],
                        ))
                if len(leaks) > 0:
                    # Report in descending order by least likelihood
                    for leak in sorted(leaks, key=itemgetter(1), reverse=True):
                        output_str = (
                            "Possible memory leak identified at line " +
                            str(leak[0]) + " (estimated likelihood: " +
                            ("%3.0f" %
                             (leak[1] * 100)) + "%" + ", velocity: " +
                            ("%3.0f MB/s" %
                             (leak[2] / stats.elapsed_time)) + ")")
                        console.print(output_str)

        if self.html:
            # Write HTML file.
            md = Markdown(
                "generated by the [scalene](https://github.com/plasma-umass/scalene) profiler"
            )
            console.print(md)
            if not self.output_file:
                self.output_file = "/dev/stdout"
            console.save_html(self.output_file, clear=False)
        else:
            if not self.output_file:
                # No output file specified: write to stdout.
                sys.stdout.write(console.export_text(styles=True))
            else:
                # Don't output styles to text file.
                console.save_text(self.output_file, styles=False, clear=False)
        return True