def vertical_advection_bandwidth(output, executions, option):
    kwargs = common_kwargs(option)

    configurations = [
        Configuration(vadv.Classic,
                      block_size=(512, 1),
                      unroll_factor=8,
                      **kwargs),
        Configuration(vadv.LocalMem,
                      block_size=(128, 1),
                      unroll_factor=28,
                      **kwargs),
        Configuration(vadv.SharedMem,
                      block_size=(64, 1),
                      unroll_factor=0,
                      **kwargs),
        Configuration(vadv.LocalMemMerged,
                      block_size=(512, 1),
                      unroll_factor=2,
                      **kwargs)
    ]

    table = run_scaling_benchmark(
        configurations,
        executions,
        preprocess_args=truncate_block_size_to_domain)
    table.to_csv(output)
def horizontal_diffusion_bandwidth(output, executions, option):
    kwargs = common_kwargs(option)

    configurations = [
        Configuration(hdiff.Classic, block_size=(64, 8, 1), **kwargs),
        Configuration(hdiff.OnTheFly,
                      block_size=(128, 4, 1),
                      loop='3D',
                      **kwargs),
        Configuration(hdiff.OnTheFlyIncache, block_size=(32, 8, 4), **kwargs),
        Configuration(hdiff.JScanSharedMem, block_size=(512, 16, 1), **kwargs),
        Configuration(hdiff.JScanOtfIncache, block_size=(128, 4, 1), **kwargs),
        Configuration(hdiff.JScanOtf, block_size=(256, 4, 1), **kwargs),
        Configuration(hdiff.JScanShuffleIncache,
                      block_size=(60, 4, 1),
                      **kwargs),
        Configuration(hdiff.JScanShuffle, block_size=(60, 3, 1), **kwargs),
        Configuration(hdiff.JScanShuffleSystolic,
                      block_size=(60, 4, 1),
                      **kwargs)
    ]

    def truncate_block_size_to_domain_if_possible(**kwargs):
        if kwargs['block_size'][0] != 60:
            return truncate_block_size_to_domain(**kwargs)
        return kwargs

    table = run_scaling_benchmark(
        configurations,
        executions,
        preprocess_args=truncate_block_size_to_domain_if_possible)
    table.to_csv(output)
def horizontal_diffusion_bandwidth(output, executions, dtype, option):
    vector_size = 64 // np.dtype(dtype).itemsize
    kwargs = common_kwargs(option,
                           dtype=dtype,
                           alignment=64,
                           vector_size=vector_size)

    configurations = [
        Configuration(hdiff.ClassicVec, **kwargs, block_size=(1024, 16, 1)),
        Configuration(hdiff.OnTheFlyVec, **kwargs, block_size=(1024, 8, 1)),
        Configuration(hdiff.MinimumMem, **kwargs, block_size=(1024, 64, 1))
    ]

    table = run_scaling_benchmark(configurations,
                                  executions,
                                  preprocess_args=scale_domain)
    table.to_csv(output)
示例#4
0
def horizontal_diffusion_bandwidth(output, executions, dtype, option):
    vector_size = 32 // np.dtype(dtype).itemsize
    kwargs = common_kwargs(option,
                           dtype=dtype,
                           vector_size=vector_size,
                           streaming_stores=True,
                           block_size=(1024, 16, 1))

    configurations = [
        Configuration(hdiff.ClassicVec, **kwargs),
        Configuration(hdiff.OnTheFlyVec, **kwargs),
        Configuration(hdiff.MinimumMem, **kwargs)
    ]

    table = run_scaling_benchmark(
        configurations,
        executions,
        preprocess_args=truncate_block_size_to_domain)
    table.to_csv(output)
def basic_bandwidth(output, executions, option):
    kwargs = common_kwargs(
        option,
        loop='3D',
        block_size=(32, 8, 1),
        halo=1,
    )

    stream_kwargs = kwargs.copy()
    stream_kwargs.update(loop='1D', block_size=(1024, 1, 1), halo=0)

    configurations = [
        Configuration(basic.Copy, name='stream', **stream_kwargs),
        Configuration(basic.Empty, name='empty', **kwargs),
        Configuration(basic.Copy, name='copy', **kwargs),
        Configuration(basic.OnesidedAverage, name='avg-i', axis=0, **kwargs),
        Configuration(basic.OnesidedAverage, name='avg-j', axis=1, **kwargs),
        Configuration(basic.OnesidedAverage, name='avg-k', axis=2, **kwargs),
        Configuration(basic.SymmetricAverage,
                      name='sym-avg-i',
                      axis=0,
                      **kwargs),
        Configuration(basic.SymmetricAverage,
                      name='sym-avg-j',
                      axis=1,
                      **kwargs),
        Configuration(basic.SymmetricAverage,
                      name='sym-avg-k',
                      axis=2,
                      **kwargs),
        Configuration(basic.Laplacian,
                      name='lap-ij',
                      along_x=True,
                      along_y=True,
                      along_z=False,
                      **kwargs)
    ]

    table = run_scaling_benchmark(configurations, executions)
    table.to_csv(output)
示例#6
0
def vertical_advection_bandwidth(output, executions, dtype, option):
    vector_size = 32 // np.dtype(dtype).itemsize
    kwargs = common_kwargs(option, dtype=dtype, vector_size=vector_size)

    configurations = [
        Configuration(vadv.KMiddleVec,
                      **kwargs,
                      block_size=(128, 1),
                      streaming_stores=True),
        Configuration(vadv.KInnermostVec, **kwargs, block_size=(64, 1)),
        Configuration(vadv.KInnermostBlockVec,
                      **kwargs,
                      block_size=(16, 1),
                      prefetch_distance=4,
                      streaming_stores=True)
    ]

    table = run_scaling_benchmark(
        configurations,
        executions,
        preprocess_args=truncate_block_size_to_domain)
    table.to_csv(output)
示例#7
0
def basic_bandwidth(output, executions, dtype, option):
    vector_size = 32 // np.dtype(dtype).itemsize
    kwargs = common_kwargs(option,
                           dtype=dtype,
                           vector_size=vector_size,
                           loop='3D-blocked-vec',
                           halo=1,
                           block_size=(1024, 16, 1),
                           streaming_stores=True)

    stream_kwargs = kwargs.copy()
    stream_kwargs.update(loop='1D-vec', halo=0)

    configurations = [
        Configuration(basic.Copy, name='stream', **stream_kwargs),
        Configuration(basic.Copy, name='copy', **kwargs),
        Configuration(basic.OnesidedAverage, name='avg-i', axis=0, **kwargs),
        Configuration(basic.OnesidedAverage, name='avg-j', axis=1, **kwargs),
        Configuration(basic.OnesidedAverage, name='avg-k', axis=2, **kwargs),
        Configuration(basic.SymmetricAverage,
                      name='sym-avg-i',
                      axis=0,
                      **kwargs),
        Configuration(basic.SymmetricAverage,
                      name='sym-avg-j',
                      axis=1,
                      **kwargs),
        Configuration(basic.SymmetricAverage,
                      name='sym-avg-k',
                      axis=2,
                      **kwargs),
        Configuration(basic.Laplacian,
                      name='lap-ij',
                      along_x=True,
                      along_y=True,
                      along_z=False,
                      **kwargs)
    ]
    table = run_scaling_benchmark(
        configurations,
        executions,
        preprocess_args=truncate_block_size_to_domain)
    table.to_csv(output)
def vertical_advection_bandwidth(output, executions, dtype, option):
    vector_size = 64 // np.dtype(dtype).itemsize
    kwargs = common_kwargs(option,
                           dtype=dtype,
                           layout=(2, 0, 1),
                           vector_size=vector_size)

    configurations = [
        Configuration(vadv.KMiddleVec, **kwargs, block_size=(1024, 1)),
        Configuration(vadv.KInnermostVec,
                      **kwargs,
                      block_size=(64, 1),
                      prefetch_distance=4),
        Configuration(vadv.KInnermostBlockVec,
                      **kwargs,
                      block_size=(64, 1),
                      prefetch_distance=2)
    ]

    table = run_scaling_benchmark(configurations,
                                  executions,
                                  preprocess_args=scale_domain)
    table.to_csv(output)