Exemplo n.º 1
0
    def _emit_build_profiling(self):
        if not is_log_enabled_for('PERF'):
            return

        # Rounder to K decimal places
        fround = lambda i, n=100: ceil(i * n) / n

        timings = self._profiler.py_timers.copy()

        tot = timings.pop('op-compile')
        perf("Operator `%s` generated in %.2f s" % (self.name, fround(tot)))

        max_hotspots = 3
        threshold = 20.

        def _emit_timings(timings, indent=''):
            timings.pop('total', None)
            entries = sorted(timings,
                             key=lambda i: timings[i]['total'],
                             reverse=True)
            for i in entries[:max_hotspots]:
                v = fround(timings[i]['total'])
                perc = fround(v / tot * 100, n=10)
                if perc > threshold:
                    perf("%s%s: %.2f s (%.1f %%)" %
                         (indent, i.lstrip('_'), v, perc))
                    _emit_timings(timings[i], ' ' * len(indent) + ' * ')

        _emit_timings(timings, '  * ')

        if self._profiler._ops:
            ops = ['%d --> %d' % i for i in self._profiler._ops]
            perf("Flops reduction after symbolic optimization: [%s]" %
                 ' ; '.join(ops))
Exemplo n.º 2
0
    def _emit_build_profiling(self):
        if not is_log_enabled_for('PERF'):
            return

        # Rounder to K decimal places
        fround = lambda i, n=100: ceil(i * n) / n

        timings = self._profiler.py_timers.copy()

        tot = timings.pop('op-compile')
        perf("Operator `%s` generated in %.2f s" % (self.name, fround(tot)))

        max_hotspots = 3
        for i in sorted(timings, key=timings.get, reverse=True)[:max_hotspots]:
            v = fround(timings[i])
            perc = fround(v / tot * 100, n=10)
            if perc > 20.:
                perf("- [Hotspot] %s: %.2f s (%.1f %%)" %
                     (i.lstrip('_'), v, perc))
Exemplo n.º 3
0
    def _emit_apply_profiling(self, args):
        """Produce a performance summary of the profiled sections."""
        # Rounder to 2 decimal places
        fround = lambda i: ceil(i * 100) / 100

        info("Operator `%s` ran in %.2f s" %
             (self.name, fround(self._profiler.py_timers['apply'])))

        summary = self._profiler.summary(args,
                                         self._dtype,
                                         reduce_over='apply')

        if not is_log_enabled_for('PERF'):
            # Do not waste time
            return summary

        if summary.globals:
            # Note that with MPI enabled, the global performance indicators
            # represent "cross-rank" performance data
            metrics = []

            v = summary.globals.get('vanilla')
            if v is not None:
                metrics.append("OI=%.2f" % fround(v.oi))
                metrics.append("%.2f GFlops/s" % fround(v.gflopss))

            v = summary.globals.get('fdlike')
            if v is not None:
                metrics.append("%.2f GPts/s" % fround(v.gpointss))

            if metrics:
                perf("Global performance: [%s]" % ', '.join(metrics))

            perf("Local performance:")
            indent = " " * 2
        else:
            indent = ""

        # Emit local, i.e. "per-rank" performance. Without MPI, this is the only
        # thing that will be emitted
        for k, v in summary.items():
            rank = "[rank%d]" % k.rank if k.rank is not None else ""
            oi = "OI=%.2f" % fround(v.oi)
            gflopss = "%.2f GFlops/s" % fround(v.gflopss)
            gpointss = "%.2f GPts/s" % fround(
                v.gpointss) if v.gpointss else None
            metrics = ", ".join(i for i in [oi, gflopss, gpointss]
                                if i is not None)
            itershapes = [
                ",".join(str(i) for i in its) for its in v.itershapes
            ]
            if len(itershapes) > 1:
                itershapes = ",".join("<%s>" % i for i in itershapes)
            elif len(itershapes) == 1:
                itershapes = itershapes[0]
            else:
                itershapes = ""
            name = "%s%s<%s>" % (k.name, rank, itershapes)

            perf("%s* %s ran in %.2f s [%s]" %
                 (indent, name, fround(v.time), metrics))
            for n, time in summary.subsections.get(k.name, {}).items():
                perf("%s+ %s ran in %.2f s [%.2f%%]" %
                     (indent * 2, n, time, fround(time / v.time * 100)))

        # Emit performance mode and arguments
        perf_args = {}
        for i in self.input + self.dimensions:
            if not i.is_PerfKnob:
                continue
            try:
                perf_args[i.name] = args[i.name]
            except KeyError:
                # Try with the aliases
                for a in i._arg_names:
                    if a in args:
                        perf_args[a] = args[a]
                        break
        perf("Performance[mode=%s] arguments: %s" %
             (self._state['optimizations'], perf_args))

        return summary
Exemplo n.º 4
0
    def _emit_apply_profiling(self, args):
        """Produce a performance summary of the profiled sections."""
        # Rounder to 2 decimal places
        fround = lambda i: ceil(i * 100) / 100

        info("Operator `%s` run in %.2f s" %
             (self.name, fround(self._profiler.py_timers['apply'])))

        summary = self._profiler.summary(args,
                                         self._dtype,
                                         reduce_over='apply')

        if not is_log_enabled_for('PERF'):
            # Do not waste time
            return summary

        if summary.globals:
            indent = " " * 2

            perf("Global performance indicators")

            # With MPI enabled, the 'vanilla' entry contains "cross-rank" performance data
            v = summary.globals.get('vanilla')
            if v is not None:
                gflopss = "%.2f GFlops/s" % fround(v.gflopss)
                gpointss = "%.2f GPts/s" % fround(
                    v.gpointss) if v.gpointss else None
                metrics = ", ".join(i for i in [gflopss, gpointss]
                                    if i is not None)
                perf(
                    "%s* Operator `%s` with OI=%.2f computed in %.2f s [%s]" %
                    (indent, self.name, fround(v.oi), fround(v.time), metrics))

            v = summary.globals.get('fdlike')
            if v is not None:
                perf("%s* Achieved %.2f FD-GPts/s" % (indent, v.gpointss))

            perf("Local performance indicators")
        else:
            indent = ""

        # Emit local, i.e. "per-rank" performance. Without MPI, this is the only
        # thing that will be emitted
        for k, v in summary.items():
            rank = "[rank%d]" % k.rank if k.rank is not None else ""
            gflopss = "%.2f GFlops/s" % fround(v.gflopss)
            gpointss = "%.2f GPts/s" % fround(
                v.gpointss) if v.gpointss else None
            metrics = ", ".join(i for i in [gflopss, gpointss]
                                if i is not None)
            itershapes = [
                ",".join(str(i) for i in its) for its in v.itershapes
            ]
            if len(itershapes) > 1:
                name = "%s%s<%s>" % (k.name, rank, ",".join(
                    "<%s>" % i for i in itershapes))
                perf("%s* %s with OI=%.2f computed in %.2f s [%s]" %
                     (indent, name, fround(v.oi), fround(v.time), metrics))
            elif len(itershapes) == 1:
                name = "%s%s<%s>" % (k.name, rank, itershapes[0])
                perf("%s* %s with OI=%.2f computed in %.2f s [%s]" %
                     (indent, name, fround(v.oi), fround(v.time), metrics))
            else:
                name = k.name
                perf("%s* %s%s computed in %.2f s" %
                     (indent, name, rank, fround(v.time)))

        # Emit relevant configuration values
        perf("Configuration:  %s" % self._state['optimizations'])

        # Emit relevant performance arguments
        perf_args = {}
        for i in self.input + self.dimensions:
            if not i.is_PerfKnob:
                continue
            try:
                perf_args[i.name] = args[i.name]
            except KeyError:
                # Try with the aliases
                for a in i._arg_names:
                    if a in args:
                        perf_args[a] = args[a]
                        break
        perf("Performance arguments:  %s" % perf_args)

        return summary