def add_matches_fpga_arrow(strings, regexes, platform_type, t_copy, t_fpga): t = Timer() # Match Arrow array on FPGA platform = pf.Platform(platform_type) context = pf.Context(platform) rc = RegExCore(context) # Initialize the platform platform.init() # Reset the UserCore rc.reset() # Prepare the column buffers context.queue_record_batch(rb) t.start() context.enable() t.stop() t_copy.append(t.seconds()) # Run the example rc.set_reg_exp_arguments(0, num_rows) # Start the matchers and poll until completion t.start() rc.start() rc.wait_for_finish(10) t.stop() t_fpga.append(t.seconds()) # Get the number of matches from the UserCore matches = rc.get_matches(np) return matches
def test_platform(): # Create platform = pf.Platform("echo") # Init platform.init() # Info print("Platform name: " + platform.get_name()) # Malloc/free address = platform.device_malloc(1024) platform.device_free(address) # MMIO platform.write_mmio(0, 0) val = platform.read_mmio(0) # Buffers size = 7 host_bytes = bytes([1, 2, 3, 4, 5, 6, 7]) host_bytearray = bytearray([1, 2, 3, 4, 5, 6, 7]) host_nparray = np.array([1, 2, 3, 4, 5, 6, 7], dtype=np.uint8) platform.copy_host_to_device(host_bytes, 0, size) platform.copy_host_to_device(host_bytearray, 7, size) platform.copy_host_to_device(host_nparray, 14, size) buffer = platform.copy_device_to_host(0, 7) platform.terminate() return True
def test_context(): # Create platform = pf.Platform("echo") # Init platform.init() # Create a schema with some stuff fields = [ pa.field("a", pa.uint64(), False), pa.field("b", pa.string(), False), pa.field("c", pa.uint64(), True), pa.field("d", pa.list_(pa.field("e", pa.uint32(), True)), False) ] schema = pa.schema(fields) a = pa.array([1, 2, 3, 4], type=pa.uint64()) b = pa.array(["hello", "world", "fletcher", "arrow"], type=pa.string()) c = pa.array([5, 6, 7, 8], mask=np.array([True, False, True, True]), type=pa.uint64()) d = pa.array([[9, 10, 11, 12], [13, 14], [15, 16, 17], [18]], type=pa.list_(pa.uint32())) f = pa.array([19, 20, 21, 22], type=pa.uint32()) g = pa.array([23, 24, 25, 26], type=pa.uint32()) rb = pa.RecordBatch.from_arrays([a, b, c, d], schema) context = pf.Context(platform) context.queue_record_batch(rb) context.queue_array(f) context.queue_array(g, field=pa.field("g", pa.uint32(), False)) # Write buffers context.enable() # Terminate platform.terminate()
def test_platform(): # Create platform = pf.Platform("echo", False) # Init platform.init() # Info print("Platform name: " + platform.name()) # Malloc address = platform.device_malloc(1024) # MMIO platform.write_mmio(0, 0) val = platform.read_mmio(0) val64 = platform.read_mmio_64(0) # Buffers size = 7 host_bytes = bytes([1, 2, 3, 4, 5, 6, 7]) host_bytearray = bytearray([1, 2, 3, 4, 5, 6, 7]) host_nparray = np.array([1, 2, 3, 4, 5, 6, 7], dtype=np.uint8) platform.copy_host_to_device(host_bytes, address, size) platform.copy_host_to_device(host_bytearray, address + 7, size) platform.copy_host_to_device(host_nparray, address + 14, size) buffer = platform.copy_device_to_host(address, 21) assert list(buffer) == [1, 2, 3, 4, 5, 6, 7] * 3 # Free buffer platform.device_free(address) platform.terminate() return True
# Match Pandas series on CPU (marginal performance improvement most likely possible with Cython) t.start() m_pcpu.append(add_matches_cpu(strings_pandas, regexes)) t.stop() t_pcpu.append(t.seconds()) print(t.seconds()) # Match Arrow array on CPU (significant performance improvement most likely possible with Cython) t.start() m_acpu.append(add_matches_cpu_arrow(rb.column(0), regexes)) t.stop() t_acpu.append(t.seconds()) print(t.seconds()) # Match Arrow array on FPGA platform = pf.Platform(platform_type) context = pf.Context(platform) rc = RegExCore(context) # Initialize the platform platform.init() # Reset the UserCore rc.reset() # Prepare the column buffers t.start() context.queue_record_batch(rb) bytes_copied += context.get_queue_size() context.enable() t.stop()
import pyfletcher as pf import numpy as np import timeit import sys import argparse if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("recordbatch_path") args = parser.parse_args() # Set up a RecordBatch reader and read the RecordBatch. reader = pa.RecordBatchFileReader(args.recordbatch_path) batch = reader.get_batch(0) platform = pf.Platform( ) # Create an interface to an auto-detected FPGA Platform. platform.init() # Initialize the Platform. context = pf.Context( platform) # Create a Context for our data on the Platform. context.queue_record_batch(batch) # Queue the RecordBatch to the Context. context.enable( ) # Enable the Context, (potentially transferring the data to FPGA). kernel = pf.Kernel( context) # Set up an interface to the Kernel, supplying the Context. kernel.start() # Start the kernel. kernel.poll_until_done() # Wait for the kernel to finish. result = kernel.get_return(np.dtype(np.uint32)) # Obtain the result. print("Sum: " + str(result)) # Print the result.
def arrow_kmeans_fpga(batch, centroids, iteration_limit, max_hw_dim, max_hw_centroids, t_copy, t_fpga): t = Timer() platform = pf.Platform(platform_type) context = pf.Context(platform) uc = pf.UserCore(context) # Initialize the platform platform.init() # Reset the UserCore uc.reset() # Prepare the column buffers context.queue_record_batch(batch) t.start() context.enable() t.stop() t_copy.append(t.seconds()) # Determine size of table last_index = batch.num_rows uc.set_range(0, last_index) # Set UserCore arguments args = [] for centroid in centroids: for dim in centroid: lo = dim & 0xFFFFFFFF hi = (dim >> 32) & 0xFFFFFFFF args.append(lo) args.append(hi) for dim in range(max_hw_dim - len(centroid)): args.append(0) args.append(0) for centroid in range(max_hw_centroids - len(centroids)): for dim in range(max_hw_dim - 1): args.append(0) args.append(0) args.append(0x80000000) args.append(0) args.append(iteration_limit) uc.set_arguments(args) t.start() uc.start() uc.wait_for_finish(10) t.stop() t_fpga.append(t.seconds()) num_centroids = len(centroids) dimensionality = len(centroids[0]) regs_per_dim = 2 regs_offset = 10 for c in range(num_centroids): for d in range(dimensionality): reg_num = (c * max_hw_dim + d) * regs_per_dim + regs_offset centroids[c][d] = platform.read_mmio_64(reg_num, type="int") return centroids