def test_svhn(): # load test image BNN_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) test_image_svhn = os.path.join(BNN_ROOT_DIR, 'Test_image', '6.png') test_image_svhn = Image.open(test_image_svhn) # Testing Hardware # Only testing CNV-W1A1 as parameters are only available to this precision classifier = bnn.CnvClassifier(bnn.NETWORK_CNVW1A1, "streetview", bnn.RUNTIME_HW) sw_classifier = bnn.CnvClassifier(bnn.NETWORK_CNVW1A1, "streetview", bnn.RUNTIME_SW) out = classifier.classify_image(test_image_svhn) print("Inferred class: ", out) assert out== 5, \ 'SVHN HW test failed for CNV-W1A1' #Testing Software out = sw_classifier.classify_image(test_image_svhn) print("Inferred class: ", out) assert out== 5, \ 'SVHN SW test failed for CNV-W1A1' print("test finished with no errors!") xlnk = Xlnk() xlnk.xlnk_reset()
def fft(ol,data_in): out = ol.axi_dma_out re = ol.axi_dma_re im = ol.axi_dma_im data_size = 512 xlnk = Xlnk() input_buffer = xlnk.cma_array(shape=(data_size,), dtype=np.int32) output_buffer_re = xlnk.cma_array(shape=(data_size,), dtype=np.int32) output_buffer_im = xlnk.cma_array(shape=(data_size,), dtype=np.int32) for i in range(512): input_buffer[i]=data_in[i] out.sendchannel.transfer(input_buffer) re.recvchannel.transfer(output_buffer_re) im.recvchannel.transfer(output_buffer_im) data_re=np.zeros(512) data_im=np.zeros(512) FFT=np.zeros(512) for i in range(512): if output_buffer_im[i]>=0x4000000: data_im[i]=-(0x8000000-output_buffer_im[i]) else: data_im[i]=output_buffer_im[i] for i in range(512): if output_buffer_re[i]>=0x4000000: data_re[i]=-(0x8000000-output_buffer_re[i]) else: data_re[i]=output_buffer_re[i] FFT=data_re*data_re+data_im*data_im return FFT
def __init__(self,sample_size=128,overlay=None): self.lock = threading.Lock() self.dma = overlay.GPS_Receiver_IQ_Streamer.axi_dma_0 self.data_size = sample_size self.xlnk = Xlnk() self.input_buffer = self.xlnk.cma_array(shape=(self.data_size,), dtype=np.uint32) self._isFetching = True self.blk_count = 0 """GPIO based settings initialization""" GPIO_BASE_ADDRESS = 0x41200000 GPS_IP_BASE_ADDRESS = 0x41210000 DMA_IP_BASE_ADDRESS = 0x40400000 ADDRESS_RANGE = 0x4 ADDRESS_OFFSET = 0x00 self.FIFORESET_OFFSET = 16 self.IQSTREAM_EN_OFFSET = 20 self.RFSTREAM_EN_OFFSET = 24 self.SAMPLES_PER_BLK_OFFSET = 0 self.LEDs = MMIO(GPIO_BASE_ADDRESS, ADDRESS_RANGE) self.GpsSettings = MMIO(GPS_IP_BASE_ADDRESS, ADDRESS_RANGE) timestr = time.strftime("%Y%m%d-%H%M%S") self.filename = "/home/xilinx/jupyter_notebooks/iotSDR-GPS/rec5s_40960IF"#+timestr """update dma frame size""" self.GpsSettings.write(0x0, self.data_size)
def test_road_sign(): # load test image BNN_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) test_image_road = os.path.join(BNN_ROOT_DIR, 'Test_image', 'stop.jpg') test_image_road = Image.open(test_image_road) # Testing Hardware # Only testing CNV-W1A1 as parameters are only available to this precision classifier = bnn.CnvClassifier(bnn.NETWORK_CNVW1A1, "road-signs", bnn.RUNTIME_HW) sw_classifier = bnn.CnvClassifier(bnn.NETWORK_CNVW1A1, "road-signs", bnn.RUNTIME_SW) out = classifier.classify_image(test_image_road) print("Inferred class: ", out) assert out==14, \ 'Road sign HW test failed for CNV-W1A1' # Testing Software out = sw_classifier.classify_image(test_image_road) print("Inferred class: ", out) assert out== 14, \ 'Road sign SW test failed for CNV-W1A1' print("test finished with no errors!") xlnk = Xlnk() xlnk.xlnk_reset()
def __init__(self, bitfile, **kwargs): """Initializes a new sharedmemOverlay object. """ # The following lines do some path searching to enable a # PYNQ-Like API for Overlays. For example, without these # lines you cannot call sharedmemOverlay('sharedmem.bit') because # sharedmem.bit is not on the bitstream search path. The # following lines fix this for any non-PYNQ Overlay # # You can safely reuse, and ignore the following lines # # Get file path of the current class (i.e. /opt/python3.6/<...>/sharedmem.py) file_path = os.path.abspath(inspect.getfile(inspect.currentframe())) # Get directory path of the current class (i.e. /opt/python3.6/<...>/sharedmem/) dir_path = os.path.dirname(file_path) # Update the bitfile path to search in dir_path bitfile = os.path.join(dir_path, bitfile) # Upload the bitfile (and parse the colocated .tcl script) super().__init__(bitfile, **kwargs) # Manually define the GPIO pin that drives reset self.__resetPin = GPIO(GPIO.get_gpio_pin(0), "out") self.nreset() # Define a Register object at address 0x0 of the mmult address space # We will use this to set bits and start the core (see start()) # Do NOT write to __ap_ctrl unless __resetPin has been set to __NRESET_VALUE self.__ap_ctrl = Register(self.mmultCore.mmio.base_addr, 32) self.__a_offset = Register( self.mmultCore.mmio.base_addr + self.__MMULT_ADDR_A_DATA, 32) self.__bt_offset = Register( self.mmultCore.mmio.base_addr + self.__MMULT_ADDR_BT_DATA, 32) self.__c_offset = Register( self.mmultCore.mmio.base_addr + self.__MMULT_ADDR_C_DATA, 32) self.xlnk = Xlnk()
def send(self, window_coeffs, n): xlnk = Xlnk() self.input_buffer = xlnk.cma_array(shape=(self.window_length, ), dtype=np.int16) dma = self.axi_dma_window np.copyto(self.input_buffer, window_coeffs) dma.sendchannel.transfer(self.input_buffer) dma.sendchannel.wait() self.input_buffer.close()
def __init__(self, description): super().__init__(description) xlnk = Xlnk() self.buf_data = xlnk.cma_array(shape=(2048, ), dtype=np.single) self.type = 1 self.data_inspector.transfer = 0 self.data_inspector.reset = 1
def __init__(self, overlay, num_fields, num_bits_per_field, num_levels): self.num_fields = num_fields self.num_bits_per_field = num_bits_per_field self.num_levels = num_levels self.tree_ctrl = overlay.binary_tree self.dma = overlay.dma self.xlnk = Xlnk() self.in_buffer = None self.out_buffer = None self.reset()
def __init__(self, description, pkt_config=1, pkt_reload=128): # Find out the correct length of config and reload super().__init__(description) xlnk = Xlnk() self.buf_config = xlnk.cma_array(shape=(pkt_config, ), dtype=np.int8) self.buf_reload = xlnk.cma_array(shape=(pkt_reload, ), dtype=np.int16) self.BWSelector.enable = 1 self._taps = 128 self._fs = 256e6 self.set_downsample(2)
def __init__(self, load_overlay=True): self.bitstream_name = None self.bitstream_name = "cv2pynq03.bit" self.bitstream_path = os.path.join(CV2PYNQ_BIT_DIR, self.bitstream_name) self.ol = Overlay(self.bitstream_path) self.ol.download() self.ol.reset() self.xlnk = Xlnk() self.partitions = 10 #split the cma into partitions for pipelined transfer self.cmaPartitionLen = self.MAX_HEIGHT * self.MAX_WIDTH / self.partitions self.listOfcma = [ self.xlnk.cma_array(shape=(int(self.MAX_HEIGHT / self.partitions), self.MAX_WIDTH), dtype=np.uint8) for i in range(self.partitions) ] self.img_filters = self.ol.image_filters self.dmaOut = self.img_filters.axi_dma_0.sendchannel self.dmaIn = self.img_filters.axi_dma_0.recvchannel self.dmaOut.stop() self.dmaIn.stop() self.dmaIn.start() self.dmaOut.start() self.filter2DType = -1 # filter types: SobelX=0, SobelY=1, ScharrX=2, ScharrY=3, Laplacian1=4, Laplacian3=5 self.filter2D_5Type = -1 # filter types: SobelX=0, SobelY=1, Laplacian5=4 self.filter2DfType = -1 # filter types: blur=0, GaussianBlur=1 self.ffi = FFI() self.f2D = self.img_filters.filter2D_hls_0 self.f2D.reset() self.f2D_5 = self.img_filters.filter2D_hls_5_0 self.f2D_5.reset() self.f2D_f = self.img_filters.filter2D_f_0 self.f2D_f.reset() self.erodeIP = self.img_filters.erode_hls_0 self.erodeIP.reset() self.dilateIP = self.img_filters.dilate_hls_0 self.dilateIP.reset() self.cmaBuffer_0 = self.xlnk.cma_array(shape=(self.MAX_HEIGHT, self.MAX_WIDTH), dtype=np.uint8) self.cmaBuffer0 = self.cmaBuffer_0.view(self.ContiguousArrayCv2pynq) self.cmaBuffer0.init(self.cmaBuffer_0) self.cmaBuffer_1 = self.xlnk.cma_array(shape=(self.MAX_HEIGHT, self.MAX_WIDTH), dtype=np.uint8) self.cmaBuffer1 = self.cmaBuffer_1.view(self.ContiguousArrayCv2pynq) self.cmaBuffer1.init(self.cmaBuffer_1) self.cmaBuffer_2 = self.xlnk.cma_array( shape=(self.MAX_HEIGHT * 4, self.MAX_WIDTH), dtype=np.uint8) # *4 for CornerHarris return self.cmaBuffer2 = self.cmaBuffer_2.view(self.ContiguousArrayCv2pynq) self.cmaBuffer2.init(self.cmaBuffer_2) self.CannyIP = self.img_filters.canny_edge_0 self.CannyIP.reset()
def __init__(self, mb_info): """Return a new instance of an Arduino_LCD18 object. Parameters ---------- mb_info : dict A dictionary storing Microblaze information, such as the IP name and the reset name. """ self.microblaze = Arduino(mb_info, ARDUINO_LCD18_PROGRAM) self.buf_manager = Xlnk()
def __init__(self): self.bitfile = general_const.BITFILE self.libfile = general_const.LIBRARY self.nshift_reg = 85 ffi = cffi.FFI() ffi.cdef( "void _p0_cpp_FIR_1_noasync(int *x, int w[85], int *ret, int datalen);" ) self.lib = ffi.dlopen(self.libfile) self.xlnk = Xlnk() if PL.bitfile_name != self.bitfile: self.download_bitstream()
def __init__(self, bitfile, **kwargs): """ Constructor (load the bit file) """ file_path = os.path.abspath(inspect.getfile(inspect.currentframe())) dir_path = os.path.dirname(file_path) bitfile = os.path.join(dir_path, bitfile) super().__init__(bitfile, **kwargs) # Manually define the GPIO pin that drives reset self.__resetPin = GPIO(GPIO.get_gpio_pin(0), "out") self.nreset() # For convenience self.__hough = self.image_processing.hough_accel_0 self.__dma = self.image_processing.axi_dma_0 # Define a Register object at address 0x00 of the overlay address space base_addr = self.__hough.mmio.base_addr self.__ap_ctrl = Register(base_addr, 32) self.__outrho_offset = Register(base_addr + self.__OUTRHO_ADDR, 32) self.__outtheta_offset = Register(base_addr + self.__OUTTHETA_ADDR, 32) self.__num_of_lines_offset = Register(base_addr + self.__NUM_OF_LINES_ADDR, 32) self.__segments_offset = Register(base_addr + self.__LINES_SEGMENTS_ADDR, 32) self.__num_of_segments_offset = Register(base_addr + self.__NUM_OF_SEGMENTS_ADDR, 32) # DMA transfer engine self.__xlnk = Xlnk() # Memory pre-allocation self.__cma_rho = self.__xlnk.cma_array(self.__LINES, np.single) self.__cma_theta = self.__xlnk.cma_array(self.__LINES, np.single) self.__cma_numoflines = self.__xlnk.cma_array(1, np.int32) self.__cma_segments = self.__xlnk.cma_array((self.__SEGMENTS, 4), np.int32) self.__cma_numofsegments = self.__xlnk.cma_array(1, np.int32) self.__cmabuf_dest = self.__xlnk.cma_array((self.__HEIGHT, self.__WIDTH, 3), np.uint8) # public self.frame = self.__xlnk.cma_array((self.__HEIGHT, self.__WIDTH, 3), np.uint8) # Write address of M_AXI to HLS core self.__outrho_offset[31:0] = self.__xlnk.cma_get_phy_addr(self.__cma_rho.pointer) self.__outtheta_offset[31:0] = self.__xlnk.cma_get_phy_addr(self.__cma_theta.pointer) self.__num_of_lines_offset[31:0] = self.__xlnk.cma_get_phy_addr(self.__cma_numoflines.pointer) self.__segments_offset[31:0] = self.__xlnk.cma_get_phy_addr(self.__cma_segments.pointer) self.__num_of_segments_offset[31:0] = self.__xlnk.cma_get_phy_addr(self.__cma_numofsegments.pointer) # Performs the computation for the first time to avoid bad behavior on the first call. # For a small number of segments, maybe not all segments will be detected if we don't # call the HoughLines function for the first time here. self.frame[:] = cv2.imread(dir_path+'/star.png') self.HoughLines(20,30,80,5,30)
def __init__(self): self.overlay = Overlay( '/home/xilinx/matmult/overlay/matmult/matmult.bit') self.dma = self.overlay.dma self.mmult_ip = self.overlay.accel self.xlnk = Xlnk() self.in_buf = self.xlnk.cma_array(shape=(2, MatrixOpServicer.DIM, MatrixOpServicer.DIM), dtype=np.float32) self.out_buf = self.xlnk.cma_array(shape=(MatrixOpServicer.DIM, MatrixOpServicer.DIM), dtype=np.float32)
def __init__(self, description, pkt_size, buf_dtype=np.int16, buf_words_per_pkt=2): super().__init__(description) # Init config register self.reset = 1 self.enable = 1 self.pkt_size = pkt_size-1 self.auto_restart = 0 self.reset = 0 # Init buffer xlnk = Xlnk() self.buf = xlnk.cma_array(shape=(pkt_size * buf_words_per_pkt, ), dtype=np.int16)
def __init__(self, n, m, pval=0.5, qval=0.1, rval=20.0, bitstream=None, library=None, cacheable=0): """Initialize the EKF object. Parameters ---------- n : int number of states m : int number of observables/measurements pval : float prediction noise covariance qval : float state noise covariance rval : float measurement noise covariance bitstream : str string identifier of the bitstream library : str string identifier of the C library cacheable : int Whether the buffers should be cacheable - defaults to 0 """ self.bitstream_name = bitstream self.overlay = Overlay(self.bitstream_name) self.library = library self.xlnk = Xlnk() self.xlnk.set_allocator_library(self.library) self._ffi = cffi.FFI() self.dlib = self._ffi.dlopen(self.library) self._ffi.cdef(self.ffi_interface) # Whether to use sds_alloc or sds_alloc_non_cacheable self.cacheable = cacheable # No previous prediction noise covariance self.P_pre = None # Current state is zero, with diagonal noise covariance matrix self.x = np.zeros(n) self.P_post = np.eye(n) * pval # Set up covariance matrices for process noise and measurement noise self.Q = np.eye(n) * qval self.R = np.eye(m) * rval # Identity matrix self.I = np.eye(n)
def run_my_cnn(path, name): overlay = Overlay(path) ip = nngen_ctrl.nngen_ip(overlay, name) xlnk = Xlnk() buf = xlnk.cma_array(16 * 1024, dtype=np.int32) for i in range(len(buf)): buf[i] = i ip.set_global_buffer(buf) ip.run() ip.wait() print(buf[:16])
def __init__(self, mb_info, intf_spec_name='PYNQZ1_LOGICTOOLS_SPECIFICATION', logictools_microblaze_bin=LOGICTOOLS_ARDUINO_BIN): """Initialize the created Microblaze object. This method leverages the initialization method of its parent. It also deals with relative / absolute path of the program. Parameters ---------- mb_info : dict A dictionary storing Microblaze information, such as the IP name and the reset name. intf_spec_name : str The name of the interface specification. logictools_microblaze_bin : str The name of the microblaze program to be loaded. Examples -------- The `mb_info` is a dictionary storing Microblaze information: >>> mb_info = {'ip_name': 'mb_bram_ctrl_3', 'rst_name': 'mb_reset_3', 'intr_pin_name': 'iop3/dff_en_reset_0/q', 'intr_ack_name': 'mb_3_intr_ack'} """ if not os.path.isabs(logictools_microblaze_bin): mb_program = os.path.join(BIN_LOCATION, logictools_microblaze_bin) else: mb_program = logictools_microblaze_bin if not self.__initialized: super().__init__(mb_info, mb_program) self.clk = Clocks self.buf_manager = Xlnk() self.buffers = dict() self.status = {k: 'RESET' for k in GENERATOR_ENGINE_DICT.keys()} self.intf_spec = eval(intf_spec_name) pin_list = list( set(self.intf_spec['traceable_io_pins'].keys()) | set(self.intf_spec['non_traceable_outputs'].keys()) | set(self.intf_spec['non_traceable_inputs'].keys())) self.pin_map = {k: 'UNUSED' for k in pin_list} self.steps = 0 self.__class__.__initialized = True
def execute_s(self, test_data, input_ch, input_dim, output_ch, output_dim): input_val = np.append( [0, 1, 0, input_ch, input_dim, output_ch, output_dim, 0], test_data.ravel()) in_buffer = Xlnk().cma_array(shape=(input_val.shape[0]), dtype=np.int16) out_buffer = Xlnk().cma_array( shape=(8 + output_ch * output_dim * output_dim), dtype=np.int16) np.copyto(in_buffer, input_val.astype(np.int16)) self.axi_dma_0.sendchannel.transfer(in_buffer) self.axi_dma_0.recvchannel.transfer(out_buffer) self.axi_dma_0.sendchannel.wait() self.axi_dma_0.recvchannel.wait() output_mat = out_buffer[8:] return output_mat
def matrixAvg(red,green,blue): dma0 = ol.axi_dma_0 xlnk = Xlnk() inputs = xlnk.cma_array(shape=(2700), dtype=np.int32) outputs = xlnk.cma_array(shape=(27), dtype=np.int32) inputs= red+green+blue dma0.sendchannel.transfer(inputs) dma0.sendchannel.wait() dma0.recvchannel.transfer(outputs) dma0.recvchannel.wait() return outputs
def load_weight_fc(self, W, index, quant_scale, multiple): IFMCH = W[0].shape[0] OFMCH = W[0].shape[1] kernel_val = W[0].ravel() * quant_scale bias_val = W[1] * quant_scale kernel = np.append([index, 0, 1, IFMCH, 0, OFMCH, 0, multiple * 20], kernel_val) kernel = np.append(kernel, bias_val) in_buffer = Xlnk().cma_array(shape=(kernel.shape[0]), dtype=np.int16) out_buffer = Xlnk().cma_array(shape=(kernel.shape[0]), dtype=np.int16) np.copyto(in_buffer, kernel.astype(np.int16)) self.axi_dma_0.sendchannel.transfer(in_buffer) self.axi_dma_0.recvchannel.transfer(out_buffer) self.axi_dma_0.sendchannel.wait() self.axi_dma_0.recvchannel.wait()
def __init__(self, if_id): """Return a new instance of an Arduino_LCD18 object. Parameters ---------- if_id : int The interface ID (3) corresponding to (ARDUINO). """ if not if_id in [ARDUINO]: raise ValueError("No such IOP for Arduino LCD device.") self.iop = request_iop(if_id, ARDUINO_LCD18_PROGRAM) self.mmio = self.iop.mmio self.buf_manager = Xlnk() self.iop.start()
def __init__(self): super().__init__("xlnk") from pynq import Xlnk self.default_memory = Xlnk() self.capabilities = { 'MEMORY_MAPPED': True }
def test_mnist(): # load test image BNN_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) test_image_mnist = os.path.join(BNN_ROOT_DIR, 'Test_image', '3.image-idx3-ubyte') # Testing Hardware # Testing LFC-W1A1 classifier = bnn.LfcClassifier(bnn.NETWORK_LFCW1A1, "mnist", bnn.RUNTIME_HW) out = classifier.classify_mnist(test_image_mnist) print("Inferred class: ", out) assert out==3, \ 'MNIST HW test failed for LFCW1A1' # Testing LFC-W1A2 classifier = bnn.LfcClassifier(bnn.NETWORK_LFCW1A2, "mnist", bnn.RUNTIME_HW) out = classifier.classify_mnist(test_image_mnist) print("Inferred class: ", out) assert out==3, \ 'MNIST HW test failed for LFCW1A2' # Testing Software # Testing LFC-W1A1 w1a1 = bnn.LfcClassifier(bnn.NETWORK_LFCW1A1, "mnist", bnn.RUNTIME_SW) out = w1a1.classify_mnist(test_image_mnist) print("Inferred class: ", out) assert out==3, \ 'MNIST SW test failed for LFC W1A1' # Testing LFC-W1A2 w1a2 = bnn.LfcClassifier(bnn.NETWORK_LFCW1A2, "mnist", bnn.RUNTIME_SW) out = w1a2.classify_mnist(test_image_mnist) print("Inferred class: ", out) assert out==3, \ 'MNIST SW test failed for LFC W1A2' print("test finished with no errors!") xlnk = Xlnk() xlnk.xlnk_reset()
def __init__(self, build_path, reset_value, description, *args): """Return a new MixedProcessor object. Parameters ---------- build_path : str Path to the RISC-V build files for this processor reset_value : int Value to be written (0 or 1) to the GPIO pin to reset the RISC-V procesor. description : dict Dictionary describing this processor. """ super().__init__(build_path, reset_value, description, *args) self.__xlnk = Xlnk()
class MatrixOpServicer(matrix_op_pb2_grpc.MatrixOpServicer): DIM = 128 def __init__(self): self.overlay = Overlay( '/home/xilinx/matmult/overlay/matmult/matmult.bit') self.dma = self.overlay.dma self.mmult_ip = self.overlay.accel self.xlnk = Xlnk() self.in_buf = self.xlnk.cma_array(shape=(2, MatrixOpServicer.DIM, MatrixOpServicer.DIM), dtype=np.float32) self.out_buf = self.xlnk.cma_array(shape=(MatrixOpServicer.DIM, MatrixOpServicer.DIM), dtype=np.float32) def MatMult(self, request, context): print('request received: matrix mult') before = time.time() # load np arrays from bytes a = pickle.loads(request.a) b = pickle.loads(request.b) lat = round((time.time() - before) * 1000000, 2) print(f'unpickled data in {lat} microsec') # run kernel before = time.time() self.in_buf[:] = np.stack((a, b)) self.dma.sendchannel.transfer(self.in_buf) self.dma.recvchannel.transfer(self.out_buf) self.mmult_ip.write(CTRL_REG, (AP_START | AUTO_RESTART)) self.dma.sendchannel.wait() self.dma.recvchannel.wait() ret = matrix_op_pb2.OpReply(res=pickle.dumps(self.out_buf)) lat = round((time.time() - before) * 1000000, 2) print(f'mult done in {lat} microsec') return ret
def alloc_descriptor(Control, data_size, NDPL = 0x0, NDPU = 0x0, Status = 0x0, APP0 = 0x0, APP1 = 0x0, APP2 = 0x0, APP3 = 0x0, APP4 = 0x0): mmu = Xlnk() descriptor = mmu.cma_array([13, ]) descriptor[0] = NDPL descriptor[1] = NDPU buffer = mmu.cma_array([1, data_size]) descriptor[2] = buffer.physical_address & 0xffffffff descriptor[3] = (buffer.physical_address >> 32) & 0xffffffff # Reversed descriptor[4] = 0x0 descriptor[5] = 0x0 descriptor[6] = Control descriptor[7] = Status descriptor[8] = APP0 descriptor[9] = APP1 descriptor[10] = APP2 descriptor[11] = APP3 descriptor[12] = APP4 return descriptor, buffer
class CmaBufferFactory(): def __init__(self): self._xlnk = Xlnk() def make_cma_buf(self, shape, data_type): assert shape != [], RuntimeError return self._xlnk.cma_array(shape=shape, cacheable=1, dtype=data_type) def del_cma_buf(self, cma_buf): cma_buf.close()
def __init__(self, ip_info, intf_spec_name): """Return a new PS controlled trace analyzer object. The maximum sample rate is 100MHz. Usually the sample rate is set to no larger than 10MHz in order for the signals to be captured on pins / wires. For Pmod header, pin numbers 0-7 correspond to the pins on the Pmod interface. For Arduino header, pin numbers 0-13 correspond to D0-D13; pin numbers 14-19 correspond to A0-A5; pin numbers 20-21 correspond to SDA and SCL. Parameters ---------- ip_info : dict The dictionary containing the IP associated with the analyzer. intf_spec_name : str/dict The name of the interface specification. """ if type(intf_spec_name) is str: self.intf_spec = eval(intf_spec_name) elif type(intf_spec_name) is dict: self.intf_spec = intf_spec_name else: raise ValueError("Interface specification has to be str or dict.") trace_cntrl_info = ip_info['trace_cntrl_{}_0'.format( self.intf_spec['monitor_width'])] trace_dma_info = ip_info['axi_dma_0'] self.trace_control = MMIO(trace_cntrl_info['phys_addr'], trace_cntrl_info['addr_range']) self.dma = DMA(trace_dma_info) self.num_analyzer_samples = 0 self.samples = None self._cma_array = None self.frequency_mhz = 0 self.clk = Clocks self.xlnk = Xlnk() self._status = 'RESET'
def test_cifar10(): BNN_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) test_image_cifar10 = os.path.join(BNN_ROOT_DIR, 'Test_image', 'deer.jpg') im = Image.open(test_image_cifar10) classifier = bnn.CnvClassifier('cifar10') out = classifier.classify_image(im) assert out==4, \ 'Cifar10 HW test failed' classifier_sw = bnn.CnvClassifier("cifar10", bnn.RUNTIME_SW) out_sw = classifier_sw.classify_image(im) assert out==4, \ 'Cifar10 SW test failed' xlnk = Xlnk() xlnk.xlnk_reset()
def test_tinier_yolo(): TEST_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) QNN_ROOT_DIR = os.path.join(TEST_ROOT_DIR, '../' ) test_image = os.path.join(TEST_ROOT_DIR, 'Test_image', 'dog.jpg') print(test_image) classifier = TinierYolo() classifier.init_accelerator() net = classifier.load_network(json_layer=os.path.join(QNN_ROOT_DIR, 'qnn', 'params', 'tinier-yolo-layers.json')) conv0_weights = np.load('/opt/python3.6/lib/python3.6/site-packages/qnn/params/tinier-yolo-conv0-W.npy', encoding="latin1") conv0_weights_correct = np.transpose(conv0_weights, axes=(3, 2, 1, 0)) conv8_weights = np.load('/opt/python3.6/lib/python3.6/site-packages/qnn/params/tinier-yolo-conv8-W.npy', encoding="latin1") conv8_weights_correct = np.transpose(conv8_weights, axes=(3, 2, 1, 0)) conv0_bias = np.load('/opt/python3.6/lib/python3.6/site-packages/qnn/params/tinier-yolo-conv0-bias.npy', encoding="latin1") conv0_bias_broadcast = np.broadcast_to(conv0_bias[:,np.newaxis], (net['conv1']['input'][0],net['conv1']['input'][1]*net['conv1']['input'][1])) conv8_bias = np.load('/opt/python3.6/lib/python3.6/site-packages/qnn/params/tinier-yolo-conv8-bias.npy', encoding="latin1") conv8_bias_broadcast = np.broadcast_to(conv8_bias[:,np.newaxis], (125,13*13)) file_name_cfg = c_char_p(os.path.join(QNN_ROOT_DIR, 'qnn', 'params', 'tinier-yolo-bwn-3bit-relu-nomaxpool.cfg').encode()) net_darknet = lib.parse_network_cfg(file_name_cfg) file_name = c_char_p(test_image.encode()) im = load_image(file_name,0,0) im_letterbox = letterbox_image(im,416,416) img_flatten = np.ctypeslib.as_array(im_letterbox.data, (3,416,416)) img = np.copy(img_flatten) img = np.swapaxes(img, 0,2) if len(img.shape)<4: img = img[np.newaxis, :, :, :] conv0_ouput = utils.conv_layer(img,conv0_weights_correct,b=conv0_bias_broadcast,stride=2,padding=1) conv0_output_quant = conv0_ouput.clip(0.0,4.0) conv0_output_quant = utils.quantize(conv0_output_quant/4,3) out_dim = net['conv7']['output'][1] out_ch = net['conv7']['output'][0] conv_output = classifier.get_accel_buffer(out_ch, out_dim) conv_input = classifier.prepare_buffer(conv0_output_quant*7) classifier.inference(conv_input, conv_output) conv7_out = classifier.postprocess_buffer(conv_output) conv7_out = conv7_out.reshape(out_dim,out_dim,out_ch) conv7_out = np.swapaxes(conv7_out, 0, 1) # exp 1 if len(conv7_out.shape)<4: conv7_out = conv7_out[np.newaxis, :, :, :] conv8_ouput = utils.conv_layer(conv7_out,conv8_weights_correct,b=conv8_bias_broadcast,stride=1) conv8_out = conv8_ouput.flatten().ctypes.data_as(ctypes.POINTER(ctypes.c_float)) lib.forward_region_layer_pointer_nolayer(net_darknet,conv8_out) tresh = c_float(0.3) tresh_hier = c_float(0.5) file_name_out = c_char_p(os.path.join(TEST_ROOT_DIR, 'Test_image', 'dog-results').encode()) file_name_probs = c_char_p(os.path.join(TEST_ROOT_DIR, 'Test_image', 'dog-probs.txt').encode()) file_names_voc = c_char_p("/opt/darknet/data/voc.names".encode()) darknet_path = c_char_p("/opt/darknet/".encode()) lib.draw_detection_python(net_darknet, file_name, tresh, tresh_hier,file_names_voc, darknet_path, file_name_out,file_name_probs) golden_probs = os.path.join(TEST_ROOT_DIR, 'Test_image', 'tinier-yolo', 'golden_probs_dog.txt') current_probs = os.path.join(TEST_ROOT_DIR, 'Test_image', 'dog-probs.txt') assert filecmp.cmp(golden_probs,current_probs), 'Tinier-Yolo test failed' classifier.deinit_accelerator() xlnk = Xlnk(); xlnk.xlnk_reset()
def test_dorefanet(): TEST_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) QNN_ROOT_DIR = os.path.join(TEST_ROOT_DIR, '../' ) test_image = os.path.join(TEST_ROOT_DIR, 'Test_image', 'n01484850_0.jpg') print(test_image) classifier = Dorefanet() classifier.init_accelerator() net = classifier.load_network(json_layer=os.path.join(QNN_ROOT_DIR, 'qnn', 'params', 'dorefanet-layers.json')) conv0_weights = np.load(os.path.join(QNN_ROOT_DIR, 'qnn', 'params', 'dorefanet-conv0.npy'), encoding="latin1").item() fc_weights = np.load('/opt/python3.6/lib/python3.6/site-packages/qnn/params/dorefanet-fc-normalized.npy', encoding='latin1').item() with open(os.path.join(QNN_ROOT_DIR, 'notebooks', 'imagenet-classes.pkl'), 'rb') as f: classes = pickle.load(f) names = dict((k, classes[k][1].split(',')[0]) for k in classes.keys()) synsets = dict((classes[k][0], classes[k][1].split(',')[0]) for k in classes.keys()) img, img_class = classifier.load_image(test_image) conv0_W = conv0_weights['conv0/W'] conv0_T = conv0_weights['conv0/T'] # 1st convolutional layer execution, having as input the image and the trained parameters (weights) conv0 = utils.conv_layer(img, conv0_W, stride=4) # The result in then quantized to 2 bits representation for the subsequent HW offload conv0 = utils.threshold(conv0, conv0_T) # Compute offloaded convolutional layers in_dim = net['conv0']['output'][1] in_ch = net['conv0']['output'][0] out_dim = net['merge4']['output_dim'] out_ch = net['merge4']['output_channels'] conv_output = classifier.get_accel_buffer(out_ch, out_dim) conv_input = classifier.prepare_buffer(conv0) classifier.inference(conv_input, conv_output) conv_output = classifier.postprocess_buffer(conv_output) fc_input = conv_output / np.max(conv_output) fc0_W = fc_weights['fc0/Wn'] fc0_b = fc_weights['fc0/bn'] fc0_out = utils.fully_connected(fc_input, fc0_W, fc0_b) fc0_out = utils.qrelu(fc0_out) fc0_out = utils.quantize(fc0_out, 2) # FC Layer 1 fc1_W = fc_weights['fc1/Wn'] fc1_b = fc_weights['fc1/bn'] fc1_out = utils.fully_connected(fc0_out, fc1_W, fc1_b) fc1_out = utils.qrelu(fc1_out) # FC Layer 2 fct_W = fc_weights['fct/W'] fct_b = np.zeros((fct_W.shape[1], )) fct_out = utils.fully_connected(fc1_out, fct_W, fct_b) # Softmax out = utils.softmax(fct_out) # Top-5 results topn = utils.get_topn_indexes(out, 5) topn_golden = np.array([ 2, 359, 250, 333, 227]) assert np.array_equal(topn,topn_golden), 'Dorefanet test failed' classifier.deinit_accelerator() xlnk = Xlnk(); xlnk.xlnk_reset()