def main(): ctx = cl.create_some_context() prof_overhead, latency = perf.get_profiling_overhead(ctx) print("command latency: %g s" % latency) print("profiling overhead: %g s -> %.1f %%" % (prof_overhead, 100 * prof_overhead / latency)) queue = cl.CommandQueue( ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) print("empty kernel: %g s" % perf.get_empty_kernel_time(queue)) print("float32 add: %g GOps/s" % (perf.get_add_rate(queue) / 1e9)) for tx_type in [ perf.HostToDeviceTransfer, perf.DeviceToHostTransfer, perf.DeviceToDeviceTransfer ]: print("----------------------------------------") print(tx_type.__name__) print("----------------------------------------") print("latency: %g s" % perf.transfer_latency(queue, tx_type)) for i in range(6, 28, 2): bs = 1 << i print("bandwidth @ %d bytes: %g GB/s" % (bs, perf.transfer_bandwidth(queue, tx_type, bs) / 1e9))
def main(): ctx = cl.create_some_context() prof_overhead, latency = perf.get_profiling_overhead(ctx) print("command latency: %g s" % latency) print("profiling overhead: %g s -> %.1f %%" % ( prof_overhead, 100*prof_overhead/latency)) queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) print("empty kernel: %g s" % perf.get_empty_kernel_time(queue)) print("float32 add: %g GOps/s" % (perf.get_add_rate(queue)/1e9)) for tx_type in [ perf.HostToDeviceTransfer, perf.DeviceToHostTransfer, perf.DeviceToDeviceTransfer]: print("----------------------------------------") print(tx_type.__name__) print("----------------------------------------") print("latency: %g s" % perf.transfer_latency(queue, tx_type)) for i in range(6, 28, 2): bs = 1<<i print("bandwidth @ %d bytes: %g GB/s" % ( bs, perf.transfer_bandwidth(queue, tx_type, bs)/1e9))
def main(): ctx = cl.create_some_context() prof_overhead, latency = perf.get_profiling_overhead(ctx) print("command latency: %g s" % latency) print("profiling overhead: {:g} s -> {:.1f} %".format( prof_overhead, 100 * prof_overhead / latency)) queue = cl.CommandQueue( ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) print("empty kernel: %g s" % perf.get_empty_kernel_time(queue)) print("float32 add: %g GOps/s" % (perf.get_add_rate(queue) / 1e9)) for tx_type in [ perf.HostToDeviceTransfer, perf.DeviceToHostTransfer, perf.DeviceToDeviceTransfer ]: print("----------------------------------------") print(tx_type.__name__) print("----------------------------------------") print("latency: %g s" % perf.transfer_latency(queue, tx_type)) for i in range(6, 31, 2): bs = 1 << i try: result = "%g GB/s" % ( perf.transfer_bandwidth(queue, tx_type, bs) / 1e9) except Exception as e: result = "exception: %s" % e.__class__.__name__ print("bandwidth @ %d bytes: %s" % (bs, result))
def run_perf_tests(self) -> None: """[summary] Raises: RuntimeError: [description] """ if self.queue is not None: prof_overhead, latency = perf.get_profiling_overhead(self.ctx) logging.debug("command latency: %g s" % latency) logging.debug("profiling overhead: %g s -> %.1f %%" % (prof_overhead, 100 * prof_overhead / latency)) logging.debug("empty kernel: %g s" % perf.get_empty_kernel_time(self.queue)) logging.debug("float32 add: %g GOps/s" % (perf.get_add_rate(self.queue) / 1e9)) for tx_type in [ perf.HostToDeviceTransfer, perf.DeviceToHostTransfer, perf.DeviceToDeviceTransfer ]: logging.debug("----------------------------------------") logging.debug(tx_type.__name__) logging.debug("----------------------------------------") logging.debug("latency: %g s" % perf.transfer_latency(self.queue, tx_type)) for i in range(6, 30, 2): bs = 1 << i try: result = "%g GB/s" % (perf.transfer_bandwidth( self.queue, tx_type, bs) / 1e9) except Exception as e: result = "exception: %s" % e.__class__.__name__ logging.debug("bandwidth @ %d bytes: %s" % (bs, result)) else: raise RuntimeError("perf tests cannot be executed without a queue")
import pyopencl as cl import pyopencl.characterize.performance as performance context = cl.create_some_context() queue = cl.CommandQueue( context, properties=cl.command_queue_properties.PROFILING_ENABLE) overhead, latency = performance.get_profiling_overhead(context) print("\n\nCommand Latency: {} s".format(latency)) print("Profiling Overhead: {} s -> {}".format(overhead, 100 * overhead / latency)) # XXX Both these lines break the program on a Mac XXX print("Empty Kernel: {} s".format(performance.get_empty_kernel_time(queue))) print("Float32 Add: {} GOps/s\n".format(performance.get_add_rate(queue) / 1e9)) for transfer_type in [ performance.HostToDeviceTransfer, performance.DeviceToHostTransfer, performance.DeviceToDeviceTransfer ]: print("\n" + transfer_type.__name__) print(" Latency: {0} s".format( performance.transfer_latency(queue, transfer_type))) for exponent in range(6, 28, 2): bytes = 1 << exponent # This bit shift << operation does 'two to the exponent' (2^exponent) print(" Bandwidth at {0} Bytes: {1} GB/s".format( bytes, performance.transfer_bandwidth(queue, transfer_type, bytes) / 1e9))
import pyopencl as cl import pyopencl.characterize.performance as performance # XXX Find out more about pyopencl.characterize - why does this exist? XXX context = cl.create_some_context() queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE) overhead, latency = performance.get_profiling_overhead(context) print("\n\nCommand Latency: {} s".format(latency)) print("Profiling Overhead: {} s -> {}".format(overhead, 100 * overhead / latency)) # XXX Both these lines break the program on a Mac XXX print("Empty Kernel: {} s".format(performance.get_empty_kernel_time(queue))) print("Float32 Add: {} GOps/s\n".format(performance.get_add_rate(queue) / 1e9)) for transfer_type in [ performance.HostToDeviceTransfer, performance.DeviceToHostTransfer, performance.DeviceToDeviceTransfer, ]: print("\n" + transfer_type.__name__) print(" Latency: {0} s".format(performance.transfer_latency(queue, transfer_type))) for exponent in range(6, 28, 2): bytes = 1 << exponent # This bit shift << operation does 'two to the exponent' (2^exponent) print( " Bandwidth at {0} Bytes: {1} GB/s".format( bytes, performance.transfer_bandwidth(queue, transfer_type, bytes) / 1e9 )