def bench(arrayLength, dtype, innerRepeat, outerRepeat):
    import numpy as np
    setup = """
		a = np.full(arrayLength, 1, dtype=real_t);
		b = np.full(arrayLength, 2, dtype=real_t);
		c = np.full(arrayLength, 3, dtype=real_t);
		alpha = real_t(42)
	"""
    times = {
        "copy": 'copy(a)',
        "scale": 'scale(a, alpha)',
        "add": 'add(a, b)',
        "triad": 'triad(a, b, alpha)'
    }
    namespace = {
        "copy": copy,
        "scale": scale,
        "add": add,
        "triad": triad,
        "np": np,
        "arrayLength": arrayLength,
        "real_t": dtype
    }
    state = state_o(namespace, innerRepeat, outerRepeat)
    results = benchmark(times, inspect.cleandoc(setup), state)
    return [("COPY", results["copy"]), ("SCALE", results["scale"]),
            ("ADD", results["add"]), ("TRIAD", results["triad"])]
示例#2
0
def bench(arrayLength, dtype, innerRepeat, outerRepeat):
	setup = """
		a = init(real_t(1), arrayLength); 
		b = init(real_t(2), arrayLength); 
		c = init(real_t(3), arrayLength); 
		alpha = real_t(42);
	"""
	times = {
		"copy":  'copy(a, b)',
		"scale": 'scale(a, b, alpha)', 
		"add":   'add(a, b, c)',
		"triad": 'triad(a, b, c, alpha)'
	}
	namespace = {
		"copy": copy,
		"scale": scale,
		"add": add,
		"triad": triad,
		"init": init,
		"arrayLength": arrayLength,
		"real_t": dtype
	}
	state = state_o(namespace, innerRepeat, outerRepeat)
	results = benchmark(times, inspect.cleandoc(setup), state)
	return [
		("COPY", results["copy"]), 
		("SCALE", results["scale"]), 
		("ADD", results["add"]), 
		("TRIAD", results["triad"])
	]
示例#3
0
def bench(arrayLength, dtype, innerRepeat, outerRepeat):
	import numpy as np
	setup = """
		a = np.full(arrayLength, 1, dtype=real_t);
		b = np.full(arrayLength, 2, dtype=real_t);
		c = np.full(arrayLength, 3, dtype=real_t);
		alpha = real_t(42)
	"""
	times = {
		"copy":  'copy(a, b)',
		"scale": 'scale(a, b, alpha)', 
		"add":   'add(a, b, c)',
		"triad": 'triad(a, b, c, alpha)'
	}
	namespace = {
		"copy": copy,
		"scale": scale,
		"add": add,
		"triad": triad,
		"np": np,
		"arrayLength": arrayLength,
		"real_t": dtype
	}
	state = state_o(namespace, innerRepeat, outerRepeat)
	results = benchmark(times, inspect.cleandoc(setup), state)

	logger.debug("NUMBA Threading Layer: {}".format(npyufunc.parallel.threading_layer()))
	logger.debug("NUMBA N Threads: {}".format(npyufunc.parallel.get_thread_count()))
	
	return [
		("COPY", results["copy"]), 
		("SCALE", results["scale"]), 
		("ADD", results["add"]), 
		("TRIAD", results["triad"])
	]
示例#4
0
def bench(arrayLength, dtype, innerRepeat, outerRepeat):
    import numpy as np
    setup = """
		a = np.full(arrayLength, 1, dtype=real_t);
		b = np.full(arrayLength, 2, dtype=real_t);
		c = np.full(arrayLength, 3, dtype=real_t);
		alpha = real_t(42)

		stream_a = cuda.stream()
		stream_b = cuda.stream()
		stream_c = cuda.stream()
		d_a = cuda.to_device(a, stream_a)
		d_b = cuda.to_device(b, stream_b)
		d_c = cuda.to_device(c, stream_c)

		n_threads = 256
		n_blocks = np.ceil(arrayLength / n_threads).astype("int32")
	"""
    times = {
        "copy": 'copy[n_blocks, n_threads](d_a, d_b)',
        "scale": 'scale(a, b, alpha)',
        "add": 'add(a, b, c)',
        "triad": 'triad(a, b, c, alpha)'
    }
    namespace = {
        "np": np,
        "cuda": cuda,
        "copy": copy,
        "scale": scale,
        "add": add,
        "triad": triad,
        "arrayLength": arrayLength,
        "real_t": dtype
    }
    state = state_o(namespace, innerRepeat, outerRepeat)
    results = benchmark(times, inspect.cleandoc(setup), state)
    return [("COPY", results["copy"]), ("SCALE", results["scale"]),
            ("ADD", results["add"]), ("TRIAD", results["triad"])]