def runTest(opt): d = pyxrt.device(opt.index) xbin = pyxrt.xclbin(opt.bitstreamFile) uuid = d.load_xclbin(xbin) memlist = xbin.get_mems() for m in memlist: if (m.get_used() == False): continue; runMemTest(opt, d, m);
def runKernel(opt): d = pyxrt.device(opt.index) xbin = pyxrt.xclbin(opt.bitstreamFile) uuid = d.load_xclbin(xbin) kernellist = xbin.get_kernels() rule = re.compile("hello*") kernel = list(filter(lambda val: rule.match(val.get_name()), kernellist))[0] hello = pyxrt.kernel(d, uuid, kernel.get_name(), pyxrt.kernel.shared) zeros = bytearray(opt.DATA_SIZE) boHandle1 = pyxrt.bo(d, opt.DATA_SIZE, pyxrt.bo.normal, hello.group_id(0)) boHandle1.write(zeros, 0) buf1 = boHandle1.map() boHandle2 = pyxrt.bo(d, opt.DATA_SIZE, pyxrt.bo.normal, hello.group_id(0)) boHandle2.write(zeros, 0) buf2 = boHandle2.map() boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, opt.DATA_SIZE, 0) boHandle2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, opt.DATA_SIZE, 0) print("Original string = [%s]" % buf1[:64].tobytes()) print("Original string = [%s]" % buf2[:64].tobytes()) print("Issue kernel start requests") run1 = hello(boHandle1) run2 = hello(boHandle2) print("Now wait for the kernels to finish using xrtRunWait()") state1 = run1.wait(5) state2 = run2.wait(5) print("Get the output data produced by the 2 kernel runs from the device") boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, opt.DATA_SIZE, 0) boHandle2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, opt.DATA_SIZE, 0) golden = memoryview(b'Hello World') result1 = buf1[:len(golden)] result2 = buf2[:len(golden)] print("Result string = [%s]" % result1.tobytes()) print("Result string = [%s]" % result2.tobytes()) assert (result1 == golden), "Incorrect output from kernel" assert (result2 == golden), "Incorrect output from kernel"
def runKernel(opt): d = pyxrt.device(opt.index) xbin = pyxrt.xclbin(opt.bitstreamFile) uuid = d.load_xclbin(xbin) COUNT = 1024 DATA_SIZE = ctypes.sizeof(ctypes.c_int32) * COUNT # Instantiate simple simple = pyxrt.kernel(d, uuid, "simple") print("Allocate and initialize buffers") boHandle1 = pyxrt.bo(d, DATA_SIZE, pyxrt.bo.normal, simple.group_id(0)) boHandle2 = pyxrt.bo(d, DATA_SIZE, pyxrt.bo.normal, simple.group_id(1)) bo1 = numpy.asarray(boHandle1.map()) bo2 = numpy.asarray(boHandle2.map()) for i in range(COUNT): bo1[i] = 0 bo2[i] = i bufReference = [i + i * 16 for i in range(COUNT)] boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, DATA_SIZE, 0) boHandle2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, DATA_SIZE, 0) print("Start the kernel, simple") run = simple(boHandle1, boHandle2, 0x10) print("Now wait for the kernel simple to finish") state = run.wait() print("Get the output data from the device and validate it") boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, DATA_SIZE, 0) assert (bufReference[:COUNT] == bo1[:COUNT] ), "Computed value does not match reference"
def runKernel(opt): d = pyxrt.device(opt.index) xbin = pyxrt.xclbin(opt.bitstreamFile) uuid = d.load_xclbin(xbin) khandle1 = pyxrt.kernel(d, uuid, "bandwidth1", pyxrt.kernel.shared) khandle2 = pyxrt.kernel(d, uuid, "bandwidth2", pyxrt.kernel.shared) output_bo1, output_buf1 = getInputOutputBuffer(d, khandle1, 0, False) output_bo2, output_buf2 = getInputOutputBuffer(d, khandle2, 0, False) input_bo1, input_buf1 = getInputOutputBuffer(d, khandle1, 1, True) input_bo2, input_buf2 = getInputOutputBuffer(d, khandle2, 1, True) TYPESIZE = 512 threshold = getThreshold(d) beats = 16 #lists dnsduration = [] dsduration = [] dbytes = [] dmbytes = [] bpersec = [] mbpersec = [] #run tests with burst length 1 beat to DATASIZE #double burst length each test test=0 throughput = [] failed = False while beats <= 1024 and not failed: print("LOOP PIPELINE %d beats" %beats) usduration = 0 fiveseconds = 5*1000000 reps = 64 while usduration < fiveseconds: start = current_micro_time() rhandle1 = khandle1(output_bo1, input_bo1, beats, reps) rhandle2 = khandle2(output_bo2, input_bo2, beats, reps) rhandle1.wait() rhandle2.wait() end = current_micro_time() usduration = end - start limit = beats * int(TYPESIZE / 8) output_bo1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, limit, 0) output_bo2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, limit, 0) failed = (input_buf1[:limit] != output_buf1[:limit]) if (failed): break failed = (input_buf2[:limit] != output_buf2[:limit]) if (failed): break # print("Reps = %d, Beats = %d, Duration = %lf us" %(reps, beats, usduration)) # for debug if usduration < fiveseconds: reps = reps*2 dnsduration.append(usduration) dsduration.append(dnsduration[test]/1000000.0) dbytes.append(reps*beats*int(TYPESIZE / 8)) dmbytes.append(dbytes[test]/(1024 * 1024)) bpersec.append(2.0*dbytes[test]/dsduration[test]) mbpersec.append(2.0*bpersec[test]/(1024 * 1024)) throughput.append(mbpersec[test]) print("Test %d, Throughput: %d MB/s" %(test, throughput[test])) beats = beats*4 test+=1 if failed: raise RuntimeError("ERROR: Failed to copy entries") print("TTTT: %d" %throughput[0]) print("Maximum throughput: %d MB/s" %max(throughput)) if max(throughput) < threshold: raise RuntimeError("ERROR: Throughput is less than expected value of %d GB/sec" %(threshold/1000))