def test_road_sign(): # load test image BNN_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) test_image_road = os.path.join(BNN_ROOT_DIR, 'Test_image', 'stop.jpg') test_image_road = Image.open(test_image_road) # Testing Hardware # Only testing CNV-W1A1 as parameters are only available to this precision classifier = bnn.CnvClassifier(bnn.NETWORK_CNVW1A1, "road-signs", bnn.RUNTIME_HW) sw_classifier = bnn.CnvClassifier(bnn.NETWORK_CNVW1A1, "road-signs", bnn.RUNTIME_SW) out = classifier.classify_image(test_image_road) print("Inferred class: ", out) assert out==14, \ 'Road sign HW test failed for CNV-W1A1' # Testing Software out = sw_classifier.classify_image(test_image_road) print("Inferred class: ", out) assert out== 14, \ 'Road sign SW test failed for CNV-W1A1' print("test finished with no errors!") xlnk = Xlnk() xlnk.xlnk_reset()
def test_svhn(): # load test image BNN_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) test_image_svhn = os.path.join(BNN_ROOT_DIR, 'Test_image', '6.png') test_image_svhn = Image.open(test_image_svhn) # Testing Hardware # Only testing CNV-W1A1 as parameters are only available to this precision classifier = bnn.CnvClassifier(bnn.NETWORK_CNVW1A1, "streetview", bnn.RUNTIME_HW) sw_classifier = bnn.CnvClassifier(bnn.NETWORK_CNVW1A1, "streetview", bnn.RUNTIME_SW) out = classifier.classify_image(test_image_svhn) print("Inferred class: ", out) assert out== 5, \ 'SVHN HW test failed for CNV-W1A1' #Testing Software out = sw_classifier.classify_image(test_image_svhn) print("Inferred class: ", out) assert out== 5, \ 'SVHN SW test failed for CNV-W1A1' print("test finished with no errors!") xlnk = Xlnk() xlnk.xlnk_reset()
def test_mnist(): # load test image BNN_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) test_image_mnist = os.path.join(BNN_ROOT_DIR, 'Test_image', '3.image-idx3-ubyte') # Testing Hardware # Testing LFC-W1A1 classifier = bnn.LfcClassifier(bnn.NETWORK_LFCW1A1, "mnist", bnn.RUNTIME_HW) out = classifier.classify_mnist(test_image_mnist) print("Inferred class: ", out) assert out==3, \ 'MNIST HW test failed for LFCW1A1' # Testing LFC-W1A2 classifier = bnn.LfcClassifier(bnn.NETWORK_LFCW1A2, "mnist", bnn.RUNTIME_HW) out = classifier.classify_mnist(test_image_mnist) print("Inferred class: ", out) assert out==3, \ 'MNIST HW test failed for LFCW1A2' # Testing Software # Testing LFC-W1A1 w1a1 = bnn.LfcClassifier(bnn.NETWORK_LFCW1A1, "mnist", bnn.RUNTIME_SW) out = w1a1.classify_mnist(test_image_mnist) print("Inferred class: ", out) assert out==3, \ 'MNIST SW test failed for LFC W1A1' # Testing LFC-W1A2 w1a2 = bnn.LfcClassifier(bnn.NETWORK_LFCW1A2, "mnist", bnn.RUNTIME_SW) out = w1a2.classify_mnist(test_image_mnist) print("Inferred class: ", out) assert out==3, \ 'MNIST SW test failed for LFC W1A2' print("test finished with no errors!") xlnk = Xlnk() xlnk.xlnk_reset()
def test_cifar10(): BNN_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) test_image_cifar10 = os.path.join(BNN_ROOT_DIR, 'Test_image', 'deer.jpg') im = Image.open(test_image_cifar10) classifier = bnn.CnvClassifier('cifar10') out = classifier.classify_image(im) assert out==4, \ 'Cifar10 HW test failed' classifier_sw = bnn.CnvClassifier("cifar10", bnn.RUNTIME_SW) out_sw = classifier_sw.classify_image(im) assert out==4, \ 'Cifar10 SW test failed' xlnk = Xlnk() xlnk.xlnk_reset()
def test_svhn(): BNN_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) test_image_svhn = os.path.join(BNN_ROOT_DIR, 'Test_image', '6.png') im = Image.open(test_image_svhn) classifier = bnn.CnvClassifier('streetview') out = classifier.classify_image(im) assert out==5, \ 'SVHN HW test failed' classifier_sw = bnn.CnvClassifier("streetview", bnn.RUNTIME_SW) out_sw = classifier_sw.classify_image(im) assert out== 5, \ 'SVHN SW test failed' xlnk = Xlnk() xlnk.xlnk_reset()
def test_road_sign(): BNN_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) test_image_road = os.path.join(BNN_ROOT_DIR, 'Test_image', 'stop.jpg') im = Image.open(test_image_road) classifier = bnn.CnvClassifier('road-signs') out = classifier.classify_image(im) assert out==14, \ 'Road sign HW test failed' classifier_sw = bnn.CnvClassifier("road-signs", bnn.RUNTIME_SW) out_sw = classifier_sw.classify_image(im) assert out== 14, \ 'Road sign SW test failed' xlnk = Xlnk() xlnk.xlnk_reset()
def test_mnist(): BNN_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) test_image_mnist = os.path.join(BNN_ROOT_DIR, 'Test_image', '3.image-idx3-ubyte') classifier = bnn.PynqBNN(network=bnn.NETWORK_LFC) classifier.load_parameters("mnist") out = classifier.inference(test_image_mnist) assert out==3, \ 'MNIST HW test failed' # Testing LFC with MNIST dataset - SW classifier_sw = bnn.PynqBNN(network=bnn.NETWORK_LFC,runtime=bnn.RUNTIME_SW) classifier_sw.load_parameters("mnist") out_sw = classifier_sw.inference(test_image_mnist) assert out_sw==3, \ 'MNIST SW test failed' xlnk = Xlnk() xlnk.xlnk_reset()
def fft2(image,FDV): fft2_design = Overlay("./bitstream/fft2.bit") dma = fft2.axi_dma_0 fft2 = fft2.fft2_0 input_array = np.array(image) xlnk = Xlnk() in_buffer = xlnk.cma_array(shape=(pic_height, pic_width), dtype=np.uint8) out_buffer = xlnk.cma_array(shape=(pic_height, pic_width), dtype=np.uint8) np.copyto(in_buffer,input_array) dma.sendchannel.transfer(in_buffer) dma.recvchannel.transfer(out_buffer) fft2.write(0x00,FDV) # start dma.sendchannel.wait() dma.recvchannel.wait() result = Image.fromarray(out_buffer) in_buffer.close() out_buffer.close() xlnk.xlnk_reset() return result
def test_tinier_yolo(): TEST_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) QNN_ROOT_DIR = os.path.join(TEST_ROOT_DIR, '../') test_image = os.path.join(TEST_ROOT_DIR, 'Test_image', 'dog.jpg') print(test_image) classifier = TinierYolo() classifier.init_accelerator() net = classifier.load_network(json_layer=os.path.join( QNN_ROOT_DIR, 'qnn', 'params', 'tinier-yolo-layers.json')) conv0_weights = np.load( '/opt/python3.6/lib/python3.6/site-packages/qnn/params/tinier-yolo-conv0-W.npy', encoding="latin1") conv0_weights_correct = np.transpose(conv0_weights, axes=(3, 2, 1, 0)) conv8_weights = np.load( '/opt/python3.6/lib/python3.6/site-packages/qnn/params/tinier-yolo-conv8-W.npy', encoding="latin1") conv8_weights_correct = np.transpose(conv8_weights, axes=(3, 2, 1, 0)) conv0_bias = np.load( '/opt/python3.6/lib/python3.6/site-packages/qnn/params/tinier-yolo-conv0-bias.npy', encoding="latin1") conv0_bias_broadcast = np.broadcast_to( conv0_bias[:, np.newaxis], (net['conv1']['input'][0], net['conv1']['input'][1] * net['conv1']['input'][1])) conv8_bias = np.load( '/opt/python3.6/lib/python3.6/site-packages/qnn/params/tinier-yolo-conv8-bias.npy', encoding="latin1") conv8_bias_broadcast = np.broadcast_to(conv8_bias[:, np.newaxis], (125, 13 * 13)) file_name_cfg = c_char_p( os.path.join(QNN_ROOT_DIR, 'qnn', 'params', 'tinier-yolo-bwn-3bit-relu-nomaxpool.cfg').encode()) net_darknet = lib.parse_network_cfg(file_name_cfg) file_name = c_char_p(test_image.encode()) im = load_image(file_name, 0, 0) im_letterbox = letterbox_image(im, 416, 416) img_flatten = np.ctypeslib.as_array(im_letterbox.data, (3, 416, 416)) img = np.copy(img_flatten) img = np.swapaxes(img, 0, 2) if len(img.shape) < 4: img = img[np.newaxis, :, :, :] conv0_ouput = utils.conv_layer(img, conv0_weights_correct, b=conv0_bias_broadcast, stride=2, padding=1) conv0_output_quant = conv0_ouput.clip(0.0, 4.0) conv0_output_quant = utils.quantize(conv0_output_quant / 4, 3) out_dim = net['conv7']['output'][1] out_ch = net['conv7']['output'][0] conv_output = classifier.get_accel_buffer(out_ch, out_dim) conv_input = classifier.prepare_buffer(conv0_output_quant * 7) classifier.inference(conv_input, conv_output) conv7_out = classifier.postprocess_buffer(conv_output) conv7_out = conv7_out.reshape(out_dim, out_dim, out_ch) conv7_out = np.swapaxes(conv7_out, 0, 1) # exp 1 if len(conv7_out.shape) < 4: conv7_out = conv7_out[np.newaxis, :, :, :] conv8_ouput = utils.conv_layer(conv7_out, conv8_weights_correct, b=conv8_bias_broadcast, stride=1) conv8_out = conv8_ouput.flatten().ctypes.data_as( ctypes.POINTER(ctypes.c_float)) lib.forward_region_layer_pointer_nolayer(net_darknet, conv8_out) tresh = c_float(0.3) tresh_hier = c_float(0.5) file_name_out = c_char_p( os.path.join(TEST_ROOT_DIR, 'Test_image', 'dog-results').encode()) file_name_probs = c_char_p( os.path.join(TEST_ROOT_DIR, 'Test_image', 'dog-probs.txt').encode()) file_names_voc = c_char_p("/opt/darknet/data/voc.names".encode()) darknet_path = c_char_p("/opt/darknet/".encode()) lib.draw_detection_python(net_darknet, file_name, tresh, tresh_hier, file_names_voc, darknet_path, file_name_out, file_name_probs) golden_probs = os.path.join(TEST_ROOT_DIR, 'Test_image', 'tinier-yolo', 'golden_probs_dog.txt') current_probs = os.path.join(TEST_ROOT_DIR, 'Test_image', 'dog-probs.txt') assert filecmp.cmp(golden_probs, current_probs), 'Tinier-Yolo test failed' classifier.deinit_accelerator() xlnk = Xlnk() xlnk.xlnk_reset()
def test_dorefanet(): TEST_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) QNN_ROOT_DIR = os.path.join(TEST_ROOT_DIR, '../' ) test_image = os.path.join(TEST_ROOT_DIR, 'Test_image', 'n01484850_0.jpg') print(test_image) classifier = Dorefanet() classifier.init_accelerator() net = classifier.load_network(json_layer=os.path.join(QNN_ROOT_DIR, 'qnn', 'params', 'dorefanet-layers.json')) conv0_weights = np.load(os.path.join(QNN_ROOT_DIR, 'qnn', 'params', 'dorefanet-conv0.npy'), encoding="latin1").item() fc_weights = np.load('/opt/python3.6/lib/python3.6/site-packages/qnn/params/dorefanet-fc-normalized.npy', encoding='latin1').item() with open(os.path.join(QNN_ROOT_DIR, 'notebooks', 'imagenet-classes.pkl'), 'rb') as f: classes = pickle.load(f) names = dict((k, classes[k][1].split(',')[0]) for k in classes.keys()) synsets = dict((classes[k][0], classes[k][1].split(',')[0]) for k in classes.keys()) img, img_class = classifier.load_image(test_image) conv0_W = conv0_weights['conv0/W'] conv0_T = conv0_weights['conv0/T'] # 1st convolutional layer execution, having as input the image and the trained parameters (weights) conv0 = utils.conv_layer(img, conv0_W, stride=4) # The result in then quantized to 2 bits representation for the subsequent HW offload conv0 = utils.threshold(conv0, conv0_T) # Compute offloaded convolutional layers in_dim = net['conv0']['output'][1] in_ch = net['conv0']['output'][0] out_dim = net['merge4']['output_dim'] out_ch = net['merge4']['output_channels'] conv_output = classifier.get_accel_buffer(out_ch, out_dim) conv_input = classifier.prepare_buffer(conv0) classifier.inference(conv_input, conv_output) conv_output = classifier.postprocess_buffer(conv_output) fc_input = conv_output / np.max(conv_output) fc0_W = fc_weights['fc0/Wn'] fc0_b = fc_weights['fc0/bn'] fc0_out = utils.fully_connected(fc_input, fc0_W, fc0_b) fc0_out = utils.qrelu(fc0_out) fc0_out = utils.quantize(fc0_out, 2) # FC Layer 1 fc1_W = fc_weights['fc1/Wn'] fc1_b = fc_weights['fc1/bn'] fc1_out = utils.fully_connected(fc0_out, fc1_W, fc1_b) fc1_out = utils.qrelu(fc1_out) # FC Layer 2 fct_W = fc_weights['fct/W'] fct_b = np.zeros((fct_W.shape[1], )) fct_out = utils.fully_connected(fc1_out, fct_W, fct_b) # Softmax out = utils.softmax(fct_out) # Top-5 results topn = utils.get_topn_indexes(out, 5) topn_golden = np.array([ 2, 359, 250, 333, 227]) assert np.array_equal(topn,topn_golden), 'Dorefanet test failed' classifier.deinit_accelerator() xlnk = Xlnk(); xlnk.xlnk_reset()
def ALL_Init(): xlnk = Xlnk() xlnk.xlnk_reset() # allocated the memory inbuff global weight_base_buffer,WEIGHT_BASE,bate_base_buffer,BETA_BASE,img_base_buffer,IMG_MEM,params_wight,params_bais,OnChipIB_Width,OnChipIB_Height,ALPHA_BETA_MAX_NUM,MEM_BASE,MEM_LEN,Memory_top,Memory_bottom,ROUTE16_LEN,CONV27_LEN,CONV24_LEN,LastLayerOutputPara,in_ptr,out_ptr weight_base_buffer = xlnk.cma_array(shape=(25470896,), dtype=np.uint32) #print("100M",weight_base_buffer.physical_address) WEIGHT_BASE = weight_base_buffer.physical_address bate_base_buffer = xlnk.cma_array(shape=(5381,), dtype=np.uint32) #print("32k",bate_base_buffer.physical_address) BETA_BASE=bate_base_buffer.physical_address img_base_buffer = xlnk.cma_array(shape=(4194304,), dtype=np.int32) #print("16M",img_base_buffer.physical_address) IMG_MEM = img_base_buffer.physical_address #=============================================== # yolov2 weight and bais copyto memory #============================================== params_wight = np.fromfile("weightsv2_comb_reorg_ap16.bin", dtype=np.uint32) np.copyto(weight_base_buffer, params_wight) #print("yolov2_weight copy ok") params_bais = np.fromfile("biasv2_comb_ap16.bin", dtype=np.uint32) np.copyto(bate_base_buffer, params_bais) #print("yolov2_bais copy ok") print('ok') OnChipIB_Width=((Tc-1)*S+K) OnChipIB_Height=((Tr-1)*S+K) ALPHA_BETA_MAX_NUM=1024 MEM_BASE = IMG_MEM MEM_LEN = 416*416*32*2+208*208*32*2 Memory_top = MEM_BASE Memory_bottom = MEM_BASE + MEM_LEN ROUTE16_LEN =26*26*512*4//2 CONV27_LEN =13*13*256*4//2 CONV24_LEN =13*13*1024*4//2 LastLayerOutputPara = pow(2.0,-inputQ[23]) in_ptr = np.zeros(32) out_ptr = np.zeros(32) for x in range(0,18): if x%2 == 0: in_ptr[x] = Memory_top out_ptr[x] = Memory_bottom - net_layers_outputs[x]*4//2 else: in_ptr[x] = out_ptr[x-1] out_ptr[x] = Memory_top for x in range(18,25): if x%2 == 0: in_ptr[x] = Memory_top out_ptr[x] = Memory_bottom - ROUTE16_LEN - net_layers_outputs[x]*4//2 else: in_ptr[x] = out_ptr[x-1]; out_ptr[x] = Memory_top; in_ptr[26] = Memory_bottom - ROUTE16_LEN out_ptr[26] = Memory_top in_ptr[27] = Memory_top out_ptr[27] = Memory_bottom - ROUTE16_LEN - CONV24_LEN - CONV27_LEN in_ptr[29] = out_ptr[27] out_ptr[29] = Memory_top in_ptr[30] = Memory_top out_ptr[30] = Memory_bottom - (net_layers_outputs[30] + 1024*7)*4//2 if(out_ptr[30]%(4*1024)!=0): out_ptr[30] = (out_ptr[30]//(4*1024)-1)*(4*1024) in_ptr[31] = out_ptr[30] #=================================================== # read label #=================================================== orig_name_path = ("coco.names") label_read = open(orig_name_path) all_lines = label_read.readlines() for line in all_lines: label_name.append(line.strip())
def __init__(self, addr_port_client=("192.168.1.100", 3000)): print('FPGA_Connect_Object init') self.resolution = [640, 360] self.client_port = addr_port_client team_name = 'SystemsETHZ' # agent = Agent(team_name) interval_time = 0 xlnk = Xlnk() xlnk.xlnk_reset() ###########################variable initializing###################### OVERLAY_PATH = '/home/xilinx/jupyter_notebooks/dac_2019_contest/common/' + team_name + '/ultra96_v04.bit' WEIGHTS_FILE_NAME = '/home/xilinx/jupyter_notebooks/dac_2019_contest/common/' + team_name + '/weights_file_v04_demo.txt' ###########################change board settings###################### ###########################download overlay###################### overlay = Overlay(OVERLAY_PATH) self.dma = overlay.axi_dma_0 self.nn_ctrl = MMIO(0xA0010000, length=1024) ###########################download weights###################### self.MINIBATCH_SIZE = 1 self.height = 176 self.width = 320 pixel_bits = 24 pixels_per_line = 384/pixel_bits self.num_lines = int((self.height*self.width)/pixels_per_line) self.in_buffer = xlnk.cma_array(shape=(self.MINIBATCH_SIZE*self.num_lines, 64), dtype=np.uint8) fire1_num_out_lines = (self.height/4)*(self.width/4)*self.MINIBATCH_SIZE self.fire1_out_buffer = xlnk.cma_array(shape=(int(16*fire1_num_out_lines),), dtype=np.uint32) fire2_num_out_lines = (self.height/8)*(self.width/8)*self.MINIBATCH_SIZE self.fire2_out_buffer = xlnk.cma_array(shape=(int(16*fire2_num_out_lines),), dtype=np.uint32) fire3_num_out_lines = (self.height/16)*(self.width/16)*self.MINIBATCH_SIZE self.fire3_out_buffer = xlnk.cma_array(shape=(int(16*fire3_num_out_lines),), dtype=np.uint32) self.fire4_out_buffer = xlnk.cma_array(shape=(int(16*fire3_num_out_lines),), dtype=np.uint32) self.fire5_out_buffer = xlnk.cma_array(shape=(int(16*fire3_num_out_lines),), dtype=np.uint32) final_num_lines = int((self.height/16)*(self.width/16)) self.bndboxes = [xlnk.cma_array(shape=(self.MINIBATCH_SIZE,final_num_lines,16), dtype=np.int32), xlnk.cma_array(shape=(self.MINIBATCH_SIZE,final_num_lines,16), dtype=np.int32), xlnk.cma_array(shape=(self.MINIBATCH_SIZE,final_num_lines,16), dtype=np.int32), xlnk.cma_array(shape=(self.MINIBATCH_SIZE,final_num_lines,16), dtype=np.int32)] self.obj_array = np.zeros((self.MINIBATCH_SIZE,final_num_lines)) NUM_LAYERS = 3+4*4 weights_file = open(WEIGHTS_FILE_NAME, "r") layer = 0 total_iterations = np.zeros(NUM_LAYERS) for line in weights_file: if "layer" in line: temp = line.split(": ") layer = int(temp[1]) if "total_iterations" in line: temp = line.split(": ") total_iterations[layer] = int(temp[1]) weights_file.close() weightfactors_length = np.zeros(NUM_LAYERS) self.weightsfactors = [] for i in range(0, NUM_LAYERS): weightfactors_length[i] = int(total_iterations[i]) self.weightsfactors.append( xlnk.cma_array(shape=(int(16*weightfactors_length[i]),), dtype=np.uint32) ) self.obj_factors = np.zeros(4) self.box_factors = np.zeros(4) index = 0 weights_file = open(WEIGHTS_FILE_NAME, "r") for line in weights_file: if "layer" in line: temp = line.split(": ") layer = int(temp[1]) index = 0 elif "total_iterations" not in line: if "obj_factor" in line: temp = line.split(' ') self.obj_factors[int(temp[1])] = int(temp[2]) elif "box_factor" in line: temp = line.split(' ') self.box_factors[int(temp[1])] = int(temp[2]) else: no0x = line.split('0x')[-1] base = 1 while base < len(no0x): part = no0x[-1*(base+8):-1*base] self.weightsfactors[layer][index*16 + int(base/8)] = int(part, 16) base += 8 index += 1
def test_cifar10(): # load test image BNN_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) test_image_cifar10 = os.path.join(BNN_ROOT_DIR, 'Test_image', 'deer.jpg') test_image_cifar10 = Image.open(test_image_cifar10) # Testing Hardware # Testing CNV-W1A1 classifier = bnn.CnvClassifier(bnn.NETWORK_CNVW1A1, "cifar10", bnn.RUNTIME_HW) out = classifier.classify_image(test_image_cifar10) print("Inferred class: ", out) assert out==4, \ 'Cifar10 HW test failed for CNV-W1A1' # Testing CNV-W1A2 classifier = bnn.CnvClassifier(bnn.NETWORK_CNVW1A2, "cifar10", bnn.RUNTIME_HW) out = classifier.classify_image(test_image_cifar10) print("Inferred class: ", out) assert out==4, \ 'Cifar10 HW test failed for CNV-W1A2' # Testing CNV-W2A2 classifier = bnn.CnvClassifier(bnn.NETWORK_CNVW2A2, "cifar10", bnn.RUNTIME_HW) out = classifier.classify_image(test_image_cifar10) print("Inferred class: ", out) assert out==4, \ 'Cifar10 HW test failed for CNV-W2A2' # Testing Software # Testing CNV-W1A1 w1a1 = bnn.CnvClassifier(bnn.NETWORK_CNVW1A1, "cifar10", bnn.RUNTIME_SW) out = w1a1.classify_image(test_image_cifar10) print("Inferred class: ", out) assert out==4, \ 'Cifar10 SW test failed for CNV-W1A1' # Testing CNV-W1A2 w1a2 = bnn.CnvClassifier(bnn.NETWORK_CNVW1A2, "cifar10", bnn.RUNTIME_SW) out = w1a2.classify_image(test_image_cifar10) print("Inferred class: ", out) assert out==4, \ 'Cifar10 SW test failed for CNV-W1A2' # Testing CNV-W2A2 w2a2 = bnn.CnvClassifier(bnn.NETWORK_CNVW2A2, "cifar10", bnn.RUNTIME_SW) out = w2a2.classify_image(test_image_cifar10) print("Inferred class: ", out) assert out==4, \ 'Cifar10 SW test failed for CNV-W2A2' print("test finished with no errors!") xlnk = Xlnk() xlnk.xlnk_reset()
def End(): Xlnk.xlnk_reset()
def test_dorefanet(): TEST_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) QNN_ROOT_DIR = os.path.join(TEST_ROOT_DIR, '../') test_image = os.path.join(TEST_ROOT_DIR, 'Test_image', 'n01484850_0.jpg') print(test_image) classifier = Dorefanet() classifier.init_accelerator() net = classifier.load_network(json_layer=os.path.join( QNN_ROOT_DIR, 'qnn', 'params', 'dorefanet-layers.json')) conv0_weights = np.load(os.path.join(QNN_ROOT_DIR, 'qnn', 'params', 'dorefanet-conv0.npy'), encoding="latin1").item() fc_weights = np.load( '/opt/python3.6/lib/python3.6/site-packages/qnn/params/dorefanet-fc-normalized.npy', encoding='latin1').item() with open(os.path.join(QNN_ROOT_DIR, 'notebooks', 'imagenet-classes.pkl'), 'rb') as f: classes = pickle.load(f) names = dict((k, classes[k][1].split(',')[0]) for k in classes.keys()) synsets = dict((classes[k][0], classes[k][1].split(',')[0]) for k in classes.keys()) img, img_class = classifier.load_image(test_image) conv0_W = conv0_weights['conv0/W'] conv0_T = conv0_weights['conv0/T'] # 1st convolutional layer execution, having as input the image and the trained parameters (weights) conv0 = utils.conv_layer(img, conv0_W, stride=4) # The result in then quantized to 2 bits representation for the subsequent HW offload conv0 = utils.threshold(conv0, conv0_T) # Compute offloaded convolutional layers in_dim = net['conv0']['output'][1] in_ch = net['conv0']['output'][0] out_dim = net['merge4']['output_dim'] out_ch = net['merge4']['output_channels'] conv_output = classifier.get_accel_buffer(out_ch, out_dim) conv_input = classifier.prepare_buffer(conv0) classifier.inference(conv_input, conv_output) conv_output = classifier.postprocess_buffer(conv_output) fc_input = conv_output / np.max(conv_output) fc0_W = fc_weights['fc0/Wn'] fc0_b = fc_weights['fc0/bn'] fc0_out = utils.fully_connected(fc_input, fc0_W, fc0_b) fc0_out = utils.qrelu(fc0_out) fc0_out = utils.quantize(fc0_out, 2) # FC Layer 1 fc1_W = fc_weights['fc1/Wn'] fc1_b = fc_weights['fc1/bn'] fc1_out = utils.fully_connected(fc0_out, fc1_W, fc1_b) fc1_out = utils.qrelu(fc1_out) # FC Layer 2 fct_W = fc_weights['fct/W'] fct_b = np.zeros((fct_W.shape[1], )) fct_out = utils.fully_connected(fc1_out, fct_W, fct_b) # Softmax out = utils.softmax(fct_out) # Top-5 results topn = utils.get_topn_indexes(out, 5) topn_golden = np.array([2, 359, 250, 333, 227]) assert np.array_equal(topn, topn_golden), 'Dorefanet test failed' classifier.deinit_accelerator() xlnk = Xlnk() xlnk.xlnk_reset()
def cleanup(self): xlnk = Xlnk() xlnk.xlnk_reset()
def test_tinier_yolo(): TEST_ROOT_DIR = os.path.dirname(os.path.realpath(__file__)) QNN_ROOT_DIR = os.path.join(TEST_ROOT_DIR, '../' ) test_image = os.path.join(TEST_ROOT_DIR, 'Test_image', 'dog.jpg') print(test_image) classifier = TinierYolo() classifier.init_accelerator() net = classifier.load_network(json_layer=os.path.join(QNN_ROOT_DIR, 'qnn', 'params', 'tinier-yolo-layers.json')) conv0_weights = np.load('/opt/python3.6/lib/python3.6/site-packages/qnn/params/tinier-yolo-conv0-W.npy', encoding="latin1") conv0_weights_correct = np.transpose(conv0_weights, axes=(3, 2, 1, 0)) conv8_weights = np.load('/opt/python3.6/lib/python3.6/site-packages/qnn/params/tinier-yolo-conv8-W.npy', encoding="latin1") conv8_weights_correct = np.transpose(conv8_weights, axes=(3, 2, 1, 0)) conv0_bias = np.load('/opt/python3.6/lib/python3.6/site-packages/qnn/params/tinier-yolo-conv0-bias.npy', encoding="latin1") conv0_bias_broadcast = np.broadcast_to(conv0_bias[:,np.newaxis], (net['conv1']['input'][0],net['conv1']['input'][1]*net['conv1']['input'][1])) conv8_bias = np.load('/opt/python3.6/lib/python3.6/site-packages/qnn/params/tinier-yolo-conv8-bias.npy', encoding="latin1") conv8_bias_broadcast = np.broadcast_to(conv8_bias[:,np.newaxis], (125,13*13)) file_name_cfg = c_char_p(os.path.join(QNN_ROOT_DIR, 'qnn', 'params', 'tinier-yolo-bwn-3bit-relu-nomaxpool.cfg').encode()) net_darknet = lib.parse_network_cfg(file_name_cfg) file_name = c_char_p(test_image.encode()) im = load_image(file_name,0,0) im_letterbox = letterbox_image(im,416,416) img_flatten = np.ctypeslib.as_array(im_letterbox.data, (3,416,416)) img = np.copy(img_flatten) img = np.swapaxes(img, 0,2) if len(img.shape)<4: img = img[np.newaxis, :, :, :] conv0_ouput = utils.conv_layer(img,conv0_weights_correct,b=conv0_bias_broadcast,stride=2,padding=1) conv0_output_quant = conv0_ouput.clip(0.0,4.0) conv0_output_quant = utils.quantize(conv0_output_quant/4,3) out_dim = net['conv7']['output'][1] out_ch = net['conv7']['output'][0] conv_output = classifier.get_accel_buffer(out_ch, out_dim) conv_input = classifier.prepare_buffer(conv0_output_quant*7) classifier.inference(conv_input, conv_output) conv7_out = classifier.postprocess_buffer(conv_output) conv7_out = conv7_out.reshape(out_dim,out_dim,out_ch) conv7_out = np.swapaxes(conv7_out, 0, 1) # exp 1 if len(conv7_out.shape)<4: conv7_out = conv7_out[np.newaxis, :, :, :] conv8_ouput = utils.conv_layer(conv7_out,conv8_weights_correct,b=conv8_bias_broadcast,stride=1) conv8_out = conv8_ouput.flatten().ctypes.data_as(ctypes.POINTER(ctypes.c_float)) lib.forward_region_layer_pointer_nolayer(net_darknet,conv8_out) tresh = c_float(0.3) tresh_hier = c_float(0.5) file_name_out = c_char_p(os.path.join(TEST_ROOT_DIR, 'Test_image', 'dog-results').encode()) file_name_probs = c_char_p(os.path.join(TEST_ROOT_DIR, 'Test_image', 'dog-probs.txt').encode()) file_names_voc = c_char_p("/opt/darknet/data/voc.names".encode()) darknet_path = c_char_p("/opt/darknet/".encode()) lib.draw_detection_python(net_darknet, file_name, tresh, tresh_hier,file_names_voc, darknet_path, file_name_out,file_name_probs) golden_probs = os.path.join(TEST_ROOT_DIR, 'Test_image', 'tinier-yolo', 'golden_probs_dog.txt') current_probs = os.path.join(TEST_ROOT_DIR, 'Test_image', 'dog-probs.txt') assert filecmp.cmp(golden_probs,current_probs), 'Tinier-Yolo test failed' classifier.deinit_accelerator() xlnk = Xlnk(); xlnk.xlnk_reset()
# coding: utf-8 # In[1]: import sys, os, time import numpy as np from pynq import Xlnk from pynq import Overlay import pynq xlnk = Xlnk() xlnk.xlnk_reset() # In[2]: overlay = Overlay("./FracNet_T_0.bit") # overlay? FracNet = overlay.FracNet_T_0 # timer = overlay.axi_timer_0 # In[3]: FracNet.register_map # In[4]: # timer.register_map # In[5]:
class HardwareDecisionTree: def __init__(self, overlay, num_fields, num_bits_per_field, num_levels): self.num_fields = num_fields self.num_bits_per_field = num_bits_per_field self.num_levels = num_levels self.tree_ctrl = overlay.binary_tree self.dma = overlay.dma self.xlnk = Xlnk() self.in_buffer = None self.out_buffer = None self.reset() def reset(self): # reset contiguous memory self.xlnk.xlnk_reset() # assert and de-assert reset self.tree_ctrl.write(0x0, 0x01) self.tree_ctrl.write(0x0, 0x00) def get_address_bits(self, regs, address): level = int(ceil(log2(address + 2)) - 1) ram_address = address - ((1 << level) - 1) def get_addr_bit_location(n): if n == 0: return [0, 0] else: result = 1 for j in range(n): result += j return [result, result+n-1] msb, lsb = get_addr_bit_location(level) _, final = get_addr_bit_location(self.num_levels - 1) total = final + 1 mask = (1 << (total - msb)) - (1 << (total - lsb - 1)) masked_address = (ram_address << (total - lsb - 1)) & mask reg_value = (regs[3] << 96) + (regs[2] << 64) + (regs[1] << 32) + regs[ 0] reg_value &= ~mask reg_value |= masked_address for i in range(4): regs[i] = (reg_value >> (32 * i)) & ((1 << 32) - 1) return regs def get_field_bits(self, regs, address, field_index): num_bits_per_field_index = ceil(log2(self.num_fields)) level = ceil(log2(address + 2)) - 1 def get_field_bit_location(n): return [n * num_bits_per_field_index, (n + 1) * num_bits_per_field_index - 1] msb, lsb = get_field_bit_location(level) _, final = get_field_bit_location(self.num_levels - 1) total = final + 1 mask = (1 << (total - msb)) - (1 << (total - lsb - 1)) masked_field_index = (field_index << (total - lsb - 1)) & mask reg_value = (regs[3] << 96) + (regs[2] << 64) + (regs[1] << 32) + regs[ 0] reg_value = reg_value & ~mask reg_value = reg_value | masked_field_index for i in range(4): regs[i] = (reg_value >> (32 * i)) & ((1 << 32) - 1) return regs def get_node_bits(self, regs, address, node_value): level = ceil(log2(address + 2)) - 1 def get_node_bit_location(n): return [n * self.num_bits_per_field, (n + 1) * self.num_bits_per_field - 1] msb, lsb = get_node_bit_location(level) _, final = get_node_bit_location(self.num_levels - 1) total = final + 1 mask = (1 << (total - msb)) - (1 << (total - lsb - 1)) masked_node = (node_value << (total - lsb - 1)) & mask reg_value = (regs[3] << 96) + (regs[2] << 64) + (regs[1] << 32) + regs[ 0] reg_value &= ~mask reg_value |= masked_node for i in range(4): regs[i] = (reg_value >> (32 * i)) & ((1 << 32) - 1) return regs def set_address(self, address): # RAM address registers 9 - 12 read_values = [self.tree_ctrl.read(0x4 * i) for i in range(9, 13)] write_values = self.get_address_bits(read_values, address) [self.tree_ctrl.write(0x4 * i, write_values[i - 9]) for i in range(9, 13)] def set_field_index(self, address, field_index): # field index registers 5 - 8 read_values = [self.tree_ctrl.read(0x4 * i) for i in range(5, 9)] write_values = self.get_field_bits(read_values, address, field_index) [self.tree_ctrl.write(0x4 * i, write_values[i - 5]) for i in range(5, 9)] def set_node_value(self, address, node_value): # node value registers 1 - 4 read_values = [self.tree_ctrl.read(0x4 * i) for i in range(1, 5)] write_values = self.get_node_bits(read_values, address, node_value) [self.tree_ctrl.write(0x4 * i, write_values[i - 1]) for i in range(1, 5)] def load_hw(self, address, field_index, node_value): # read, modify, and write node value, field index, and address self.set_node_value(address, node_value) self.set_field_index(address, field_index) self.set_address(address) # assert and de-assert load self.tree_ctrl.write(0x0, 0x02) self.tree_ctrl.write(0x0, 0x00) def show_ctrl_registers(self): for i in range(1, 13): print('Slave register{}: {}'.format( i, hex(self.tree_ctrl.read(0x4 * i)))) def prepare_hw(self, data_batch): length = len(data_batch) self.in_buffer = self.xlnk.cma_array(shape=length, dtype=np.uint32) self.out_buffer = self.xlnk.cma_array(shape=length, dtype=np.uint32) for i, data in enumerate(data_batch): for field_index in data: self.in_buffer[i] |= ( data[field_index] << ( (self.num_fields - 1 - field_index) * self.num_bits_per_field)) def search_hw(self): self.dma.sendchannel.transfer(self.in_buffer) self.dma.recvchannel.transfer(self.out_buffer) self.dma.sendchannel.wait() self.dma.recvchannel.wait() return self.out_buffer