示例#1
0
    def _specialize_clusters(cls, clusters, **kwargs):
        options = kwargs['options']
        platform = kwargs['platform']
        sregistry = kwargs['sregistry']

        # Toposort+Fusion (the former to expose more fusion opportunities)
        clusters = fuse(clusters, toposort=True)

        # Hoist and optimize Dimension-invariant sub-expressions
        clusters = cire(clusters, 'invariants', sregistry, options, platform)
        clusters = Lift().process(clusters)

        # Reduce flops (potential arithmetic alterations)
        clusters = extract_increments(clusters, sregistry)
        clusters = cire(clusters, 'sops', sregistry, options, platform)
        clusters = factorize(clusters)
        clusters = optimize_pows(clusters)

        # The previous passes may have created fusion opportunities, which in
        # turn may enable further optimizations
        clusters = fuse(clusters)
        clusters = eliminate_arrays(clusters)

        # Reduce flops (no arithmetic alterations)
        clusters = cse(clusters, sregistry)

        # Blocking to improve data locality
        clusters = Blocking(options).process(clusters)

        return clusters
示例#2
0
文件: cpu.py 项目: italoaug/devito
    def _make_clusters_passes_mapper(cls, **kwargs):
        options = kwargs['options']
        platform = kwargs['platform']
        sregistry = kwargs['sregistry']

        return {
            'blocking':
            Blocking(options).process,
            'factorize':
            factorize,
            'fuse':
            fuse,
            'lift':
            lambda i: Lift().process(
                cire(i, 'invariants', sregistry, options, platform)),
            'cire-sops':
            lambda i: cire(i, 'sops', sregistry, options, platform),
            'cire-divs':
            lambda i: cire(i, 'divs', sregistry, options, platform),
            'cse':
            lambda i: cse(i, sregistry),
            'opt-pows':
            optimize_pows,
            'topofuse':
            lambda i: fuse(i, toposort=True)
        }
示例#3
0
    def _specialize_clusters(cls, clusters, **kwargs):
        options = kwargs['options']
        platform = kwargs['platform']

        # To create temporaries
        counter = generator()
        template = lambda: "r%d" % counter()

        # Toposort+Fusion (the former to expose more fusion opportunities)
        clusters = fuse(clusters, toposort=True)

        # Hoist and optimize Dimension-invariant sub-expressions
        clusters = cire(clusters, template, 'invariants', options, platform)
        clusters = Lift().process(clusters)

        # Reduce flops (potential arithmetic alterations)
        clusters = extract_increments(clusters, template)
        clusters = cire(clusters, template, 'sops', options, platform)
        clusters = factorize(clusters)
        clusters = optimize_pows(clusters)

        # Reduce flops (no arithmetic alterations)
        clusters = cse(clusters, template)

        # Lifting may create fusion opportunities, which in turn may enable
        # further optimizations
        clusters = fuse(clusters)
        clusters = eliminate_arrays(clusters, template)

        return clusters
示例#4
0
    def _pipeline(self, clusters, *args):
        clusters = extract_time_invariants(clusters, *args)
        clusters = cire(clusters, *args)
        clusters = cse(clusters, *args)
        clusters = factorize(clusters)

        return clusters
示例#5
0
    def _make_clusters_passes_mapper(cls, **kwargs):
        options = kwargs['options']
        platform = kwargs['platform']
        sregistry = kwargs['sregistry']

        runs_on_host, reads_if_on_host = make_callbacks(options)

        return {
            'blocking':
            Blocking(options).process,
            'tasking':
            Tasker(runs_on_host).process,
            'streaming':
            Streaming(reads_if_on_host).process,
            'factorize':
            factorize,
            'fuse':
            fuse,
            'lift':
            lambda i: Lift().process(
                cire(i, 'invariants', sregistry, options, platform)),
            'cire-sops':
            lambda i: cire(i, 'sops', sregistry, options, platform),
            'cse':
            lambda i: cse(i, sregistry),
            'opt-pows':
            optimize_pows,
            'topofuse':
            lambda i: fuse(i, toposort=True)
        }
示例#6
0
    def _specialize_clusters(cls, clusters, **kwargs):
        options = kwargs['options']
        platform = kwargs['platform']
        sregistry = kwargs['sregistry']

        # Toposort+Fusion (the former to expose more fusion opportunities)
        clusters = fuse(clusters, toposort=True)

        # Hoist and optimize Dimension-invariant sub-expressions
        clusters = cire(clusters, 'invariants', sregistry, options, platform)
        clusters = Lift().process(clusters)

        # Reduce flops
        clusters = extract_increments(clusters, sregistry)
        clusters = cire(clusters, 'sops', sregistry, options, platform)
        clusters = factorize(clusters)
        clusters = optimize_pows(clusters)

        # The previous passes may have created fusion opportunities
        clusters = fuse(clusters)

        # Reduce flops
        clusters = cse(clusters, sregistry)

        return clusters
示例#7
0
文件: cpu.py 项目: kenhester/devito
    def _make_clusters_passes_mapper(cls, **kwargs):
        options = kwargs['options']
        platform = kwargs['platform']
        sregistry = kwargs['sregistry']

        # Callback used by `buffering`; it mimics `is_on_device`, which is used
        # on device backends
        def callback(f):
            if f.is_TimeFunction and f.save is not None:
                return [f.time_dim]
            else:
                return None

        return {
            'buffering':
            lambda i: buffering(i, callback, sregistry, options),
            'blocking':
            lambda i: blocking(i, options),
            'factorize':
            factorize,
            'fuse':
            fuse,
            'lift':
            lambda i: Lift().process(
                cire(i, 'invariants', sregistry, options, platform)),
            'cire-sops':
            lambda i: cire(i, 'sops', sregistry, options, platform),
            'cse':
            lambda i: cse(i, sregistry),
            'opt-pows':
            optimize_pows,
            'topofuse':
            lambda i: fuse(i, toposort=True)
        }
示例#8
0
    def _make_clusters_passes_mapper(cls, **kwargs):
        options = kwargs['options']
        platform = kwargs['platform']
        sregistry = kwargs['sregistry']

        # Callbacks used by `Tasking` and `Streaming`
        runs_on_host, reads_if_on_host = make_callbacks(options)

        # Callback used by `buffering`
        def callback(f):
            if not is_on_device(f, options['gpu-fit']):
                return [f.time_dim]
            else:
                return None

        return {
            'buffering': lambda i: buffering(i, callback, sregistry, options),
            'blocking': lambda i: blocking(i, sregistry, options),
            'tasking': Tasker(runs_on_host).process,
            'streaming': Streaming(reads_if_on_host).process,
            'factorize': factorize,
            'fission': fission,
            'fuse': lambda i: fuse(i, options=options),
            'lift': lambda i: Lift().process(cire(i, 'invariants', sregistry,
                                                  options, platform)),
            'cire-sops': lambda i: cire(i, 'sops', sregistry, options, platform),
            'cse': lambda i: cse(i, sregistry),
            'opt-pows': optimize_pows,
            'topofuse': lambda i: fuse(i, toposort=True, options=options)
        }
示例#9
0
文件: cpu.py 项目: rhodrin/devito
    def _specialize_clusters(cls, clusters, **kwargs):
        """
        Optimize Clusters for better runtime performance.
        """
        options = kwargs['options']
        platform = kwargs['platform']

        # To create temporaries
        counter = generator()
        template = lambda: "r%d" % counter()

        # Toposort+Fusion (the former to expose more fusion opportunities)
        clusters = Toposort().process(clusters)
        clusters = fuse(clusters)

        # Hoist and optimize Dimension-invariant sub-expressions
        clusters = cire(clusters, template, 'invariants', options, platform)
        clusters = Lift().process(clusters)

        # Blocking to improve data locality
        clusters = Blocking(options).process(clusters)

        # Reduce flops (potential arithmetic alterations)
        clusters = extract_increments(clusters, template)
        clusters = cire(clusters, template, 'sops', options, platform)
        clusters = factorize(clusters)
        clusters = optimize_pows(clusters)
        clusters = freeze(clusters)

        # Reduce flops (no arithmetic alterations)
        clusters = cse(clusters, template)

        # The previous passes may have created fusion opportunities, which in
        # turn may enable further optimizations
        clusters = fuse(clusters)
        clusters = eliminate_arrays(clusters, template)
        clusters = scalarize(clusters, template)

        return clusters
示例#10
0
    def _specialize_clusters(cls, clusters, **kwargs):
        options = kwargs['options']
        platform = kwargs['platform']
        sregistry = kwargs['sregistry']

        # Optimize MultiSubDomains
        clusters = optimize_msds(clusters)

        # Toposort+Fusion (the former to expose more fusion opportunities)
        clusters = fuse(clusters, toposort=True, options=options)

        # Fission to increase parallelism
        clusters = fission(clusters)

        # Hoist and optimize Dimension-invariant sub-expressions
        clusters = cire(clusters, 'invariants', sregistry, options, platform)
        clusters = Lift().process(clusters)

        # Blocking to define thread blocks
        if options['blockeager']:
            clusters = blocking(clusters, sregistry, options)

        # Reduce flops
        clusters = extract_increments(clusters, sregistry)
        clusters = cire(clusters, 'sops', sregistry, options, platform)
        clusters = factorize(clusters)
        clusters = optimize_pows(clusters)

        # The previous passes may have created fusion opportunities
        clusters = fuse(clusters)

        # Reduce flops
        clusters = cse(clusters, sregistry)

        # Blocking to define thread blocks
        if options['blocklazy']:
            clusters = blocking(clusters, sregistry, options)

        return clusters