def __init__(self, input_img, bitfile, model_path1, model_path2, numfpga, numclus): """ In this example MDLA will be capable of taking an input image and running that image on all clusters """ print('{}{}{}...'.format(CP_Y, 'Initializing MDLA', CP_0)) ################################################################################ # Initialize 2 Micron DLA self.dla1 = microndla.MDLA() self.dla2 = microndla.MDLA() # Run the network in batch mode (one image on all clusters) self.dla1.SetFlag('clustersbatchmode', '0') self.dla2.SetFlag('clustersbatchmode', '0') self.batch, self.height, self.width, self.channels = input_img.shape self.dla1.SetFlag('nclusters', str(numclus)) self.dla2.SetFlag('nclusters', str(numclus)) # Compile the NN and generate instructions <save.bin> for MDLA self.dla1.Compile(model_path1) self.dla2.Compile(model_path2) print('{}{}{}!!!'.format(CP_C, 'Successfully generated binaries for MDLA', CP_0)) # Send the generated instructions to MDLA # Send the bitfile to the FPGA only during the first run # Otherwise bitfile is an empty string print('{}{}{}!!!'.format(CP_G, 'MDLA initialization complete', CP_0)) print('{:-<80}'.format(''))
def __init__(self, input_img, bitfile, model_path1, model_path2, numfpga, numclus): """ In this example MDLA will be capable of taking multiple input images and running that images through 2 models on 1 fpga """ print('{}{}{}...'.format(CP_Y, 'Initializing MDLA', CP_0)) # Initialize 1 Micron DLA self.dla = microndla.MDLA() self.dla2 = microndla.MDLA() # Run the network in batch mode (one image on all clusters) self.batch, self.height, self.width, self.channels = input_img.shape # Compile the NN and generate instructions <save.bin> for MDLA self.dla.SetFlag({'nclusters': numclus, 'clustersbatchmode': 1}) self.dla2.SetFlag({'nclusters': numclus, 'clustersbatchmode': 1, 'firstcluster': numclus}) #self.dla.SetFlag('debug', 'bw') # Comment it out to see detailed output from compiler self.dla.Compile(model_path1) self.dla2.Compile(model_path2) print('{}{}{}!!!'.format(CP_C, 'Successfully generated binaries for MDLA', CP_0)) # Send the generated instructions to MDLA # Send the bitfile to the FPGA only during the first run # Otherwise bitfile is an empty string print('{}{}{}!!!'.format(CP_G, 'MDLA initialization complete', CP_0)) print('{:-<80}'.format(''))
def __init__(self, input_img, n_classes, bitfile, model_path): """ In this example MDLA will be capable of taking an input image and running that image on all clusters """ print('{}{}{}...'.format(CP_Y, 'Initializing MDLA', CP_0)) ################################################################################ # Initialize Micron DLA self.dla = microndla.MDLA() if bitfile and bitfile != '': self.dla.SetFlag('bitfile', bitfile) print('{}{}{}'.format(CP_C, 'Finished loading bitfile on FPGA', CP_0)) # Run the network in no-batch mode (one image on all clusters) self.dla.SetFlag('clustersbatchmode', '1') # TODO Uncomment this line to see detailed compiler output #self.dla.SetFlag('debug', 'b') self.height, self.width, self.channels = input_img.shape # Compile the NN and generate instructions <save.bin> for MDLA self.dla.Compile(model_path) print('{}{}{}'.format(CP_C, 'Successfully generated binaries for MDLA', CP_0)) # Send the generated instructions to MDLA # Send the bitfile to the FPGA only during the first run # Otherwise bitfile is an empty string print('\n{}{}{}!!!'.format(CP_G, 'MDLA initialization complete', CP_0)) print('{:-<80}'.format('')) # Allocate space for output if the model self.n_classes = n_classes # Number of expected output planes/classes
def __init__(self, input_img, bitfile, model_path, numfpga, numclus): """ In this example MDLA will be capable of taking an input image and running that image on all clusters """ print('{}{}{}...'.format(CP_Y, 'Initializing MDLA', CP_0)) # Initialize Micron DLA self.dla = microndla.MDLA() self.batch, self.height, self.width, self.channels = input_img.shape # Run the network in batch mode (two images, one on each cluster) image_per_cluster=self.batch/numclus/numfpga if image_per_cluster==1: self.dla.SetFlag('clustersbatchmode', '0') else: self.dla.SetFlag('imgs_per_cluster', str(image_per_cluster)) self.dla.SetFlag('nfpgas', str(numfpga)) self.dla.SetFlag('nclusters', str(numclus)) if bitfile and bitfile != '': self.dla.SetFlag('bitfile', bitfile) print('{}{}{}'.format(CP_C, 'Finished loading bitfile on FPGA', CP_0)) #self.dla.SetFlag('debug', 'b') # Uncomment it to see detailed output from compiler # Compile the NN and generate instructions <save.bin> for MDLA sz = "{:d}x{:d}x{:d}x{:d}".format(self.batch, self.channels, self.height, self.width) self.dla.Compile(model_path, 'save.bin', sz) print('{}{}{}!!!'.format(CP_C, 'Successfully generated binaries for MDLA', CP_0)) # Send the generated instructions to MDLA # Send the bitfile to the FPGA only during the first run # Otherwise bitfile is an empty string self.dla.Init('save.bin') print('{}{}{}!!!'.format(CP_G, 'MDLA initialization complete', CP_0)) print('{:-<80}'.format(''))
def __init__(self, input_img, bitfile, model_path, numfpga=1, numclus=1, nobatch=False): print('Initializing MDLA') self.dla = microndla.MDLA() # initialize MDLA sz = "{:d}x{:d}x{:d}".format(224, 224, 1) # input size from the ONNX model if nobatch: # Check if you need to run one image on whole fpga or not self.dla.SetFlag('clustersbatchmode', '1') self.dla.SetFlag('nclusters', str(numclus)) self.dla.SetFlag('nfpgas', str(numfpga)) if bitfile and bitfile != '': self.dla.SetFlag('bitfile', bitfile) #self.dla.SetFlag('debug', 'b') # Comment it out for detailed output from compiler self.dla.Compile( model_path, 'save.bin' ) # Compile the NN and generate instructions <save.bin> for MDLA print('\nSuccesfully generated binaries for MDLA') self.dla.Init( 'save.bin' ) # Send instruction to FPGA and load bitfile if necessary print('MDLA initialization complete\n')
def __init__(self, input_img, bitfile, model_path, numfpga=1, nobatch=False): self.dla = microndla.MDLA() b, h, w, c = input_img.shape if nobatch: self.dla.SetFlag('clustersbatchmode', '1') assert b == 1, "Input batch should be equal to 1 for nobatch mode" self.dla.SetFlag('nfpgas', str(numfpga)) if bitfile and bitfile != '': self.dla.SetFlag('bitfile', bitfile) self.dla.Compile(model_path) self.cfg = yolov3_cfg self.grids = [] self.n = [] self.anchors = [] self.strides = [] self.na = 3 self.no = 85 self.create_grids(h, w)
def __init__(self, model_path, class_names, res, bitfile, numclus=4, threshold=0.5, disp_time=1): self.thr = threshold self.times = deque(maxlen=25) self.disp_time = disp_time # Load class names from file with open(class_names, 'r') as f: self.labels = f.readlines() for i in range(len(self.labels)): self.labels[i] = self.labels[i].rstrip() # Initialize Micron DLA self.dla = microndla.MDLA() self.res = res w, h, c = res # Run the network in batch mode (one image on all clusters) self.dla.SetFlag('clustersbatchmode', '1') # Compile the NN and generate instructions <save.bin> for MDLA if bitfile and bitfile != '': self.dla.SetFlag('bitfile', bitfile) self.dla.SetFlag('nclusters', str(numclus)) sz = '{:1}x{:d}x{:d}x{:d}'.format(1, c, h, w) self.dla.Compile(model_path, 'save.bin', sz) # Init fpga with compiled machine code self.dla.Init('save.bin') # Model has 10 outputs that each need to be reshaped to the following sizes self.output_shapes = [ (1, 720, int(h / 8 + .5), int(w / 8 + .5)), (1, 720, int(h / 16 + .5), int(w / 16 + .5)), (1, 720, int(h / 32 + .5), int(w / 32 + .5)), (1, 720, int(h / 64 + .5), int(w / 64 + .5)), (1, 720, int(h / 128 + .5), int(w / 128 + .5)), (1, 36, int(h / 8 + .5), int(w / 8 + .5)), (1, 36, int(h / 16 + .5), int(w / 16 + .5)), (1, 36, int(h / 32 + .5), int(w / 32 + .5)), (1, 36, int(h / 64 + .5), int(w / 64 + .5)), (1, 36, int(h / 128 + .5), int(w / 128 + .5)), ]
def ieprocess(image_file, network_file): # load image and resize it: img = Image.open(image_file) #Resize it to the size expected by the network img = img.resize((224, 224), resample=Image.BILINEAR) #Convert to numpy float img = np.array(img).astype(np.float32) / 255 #Transpose to plane-major, as required by our API img = np.ascontiguousarray(img.transpose(2, 0, 1)) # print(img) print('Image shape:', img.shape) #Normalize images stat_mean = list([0.485, 0.456, 0.406]) stat_std = list([0.229, 0.224, 0.225]) for i in range(3): img[i] = (img[i] - stat_mean[i]) / stat_std[i] #Create and initialize the Inference Engine object ie = microndla.MDLA() #Compile to a file swnresults = ie.Compile("{:d}x{:d}x{:d}".format(224, 224, 3), network_file, 'save.bin') #Init fpga nresults = ie.Init('save.bin', '') #Create the storage for the result and run one inference result = np.ndarray(swnresults, dtype=np.float32) ie.Run(img, result) #Convert to numpy and print top-5 idxs = (-result).argsort() rstring = [] with open("categories.txt") as f: categories = f.read().splitlines() for i in range(5): rstring.append( str(categories[idxs[i]]) + ', ' + str(result[idxs[i]])) #Free ie.Free() return rstring
hidden2 = (torch.randn(1, 1, nh2), torch.randn(1, 1, nh2)) # clean out hidden state inputs = torch.cat(inputs).view(len(inputs), 1, -1) modelL = LSTMm() # Export onnx torch.onnx.export(modelL, (inputs, hidden, hidden2), "model.onnx") # Run in pytorch out = modelL(inputs, hidden, hidden2) result_pyt = out[0] result_pyt = result_pyt.permute(1, 0, 2).contiguous() result_pyt = result_pyt.view(1,-1) result_pyt = result_pyt.detach().numpy() #Create and initialize the Inference Engine object ie = microndla.MDLA() ie.SetFlag('debug','bw') #Compile to a file ie.Compile('model.onnx', 'model.bin') #Init fpga ie.Init('model.bin') np.random.seed(1) img = inputs.numpy().transpose(1, 0, 2) hid = [hidden[0].numpy().transpose(1,0,2), hidden[1].numpy().transpose(1,0,2)] hid2 = [hidden2[0].numpy().transpose(1,0,2), hidden2[1].numpy().transpose(1,0,2)]
def forward(self, x): y = self.op(x) return y w = args.w i = args.i k = args.k s = args.s p = args.p inVec1 = torch.randn(1, i, w, w, dtype=torch.float32) modelMax = Maxpool(k, s, p) torch.onnx.export(modelMax, inVec1, "net_maxpool.onnx") sf = microndla.MDLA() if args.verbose: sf.SetFlag('debug', 'b') #debug options # Compile to generate binary sf.Compile('net_maxpool.onnx', 'net_maxpool.bin') sf.Init("./net_maxpool.bin") in_1 = np.ascontiguousarray(inVec1) result = sf.Run(in_1) outhw = modelMax(inVec1) result_pyt = outhw.detach().numpy() if args.verbose: print("pytorch : {}".format(result_pyt)) print("hw : {}".format(result))
import microndla import sys import PIL from PIL import Image import numpy as np from argparse import ArgumentParser parser = ArgumentParser(description="Micron DLA Load bitfile") _ = parser.add_argument _('bitfile', type=str, default='', help='Path to the bitfile') _('-f', '--fpga', type=str, default='', help='Select fpga type to use: 511 or 852') _('-n', '--nfpga', type=str, default='1', help='number of fpgas used') args = parser.parse_args() ie = microndla.MDLA() # create MDLA obj #ie.SetFlag('debug', 'bw') # select fpga type if args.fpga == "511" or args.fpga == "852": ie.SetFlag('fpgaid', args.fpga) # select fpga type ie.SetFlag('nfpgas', args.nfpga) # select fpga type ie.SetFlag('bitfile', args.bitfile) # load bitfile ie.Free() # free MDLA obj print('done')
for i in range(2): #Convert to numpy float img[i] = np.array(img[i]).astype(np.float32) / 255 #Transpose to plane-major, as required by our API img[i] = np.ascontiguousarray(img[i].transpose(2, 0, 1)) #Normalize images stat_mean = list([0.485, 0.456, 0.406]) stat_std = list([0.229, 0.224, 0.225]) for j in range(3): img[i][j] = (img[i][j] - stat_mean[j]) / stat_std[j] #Create and initialize the Inference Engine object nclus = 2 ie = microndla.MDLA() ie2 = microndla.MDLA() ie.SetFlag({'nclusters': nclus, 'clustersbatchmode': 1}) ie2.SetFlag({ 'nclusters': nclus, 'firstcluster': nclus, 'clustersbatchmode': 1 }) #Compile to a file ie.Compile(args.modelpath1) ie2.Compile(args.modelpath2, MDLA=ie) #Create the storage for the result and run one inference ie.PutInput(img[0], None) ie2.PutInput(img[1], None)