def readXMsg(sock): s = sock.recv(4) if s is None or s == '': return None sz = struct.unpack('<i', s)[0] if sz != 0x20aa30bb: raise Exception('Bad magic') s = sock.recv(4) if s is None or s == '': raise Exception('Cannot read message sz') sz = struct.unpack('<i', s)[0] recvsz = 0 msgstr = "" while recvsz < sz: r = sock.recv(sz - recvsz) msgstr = msgstr + r recvsz = recvsz + len(r) xmsg = xdrive_pb2.XMsg() xmsg.ParseFromString(msgstr) return xmsg
def img_classify(msg): global g_inputs global g_inputbuf global g_fpgaOutput global g_weightsBlob global g_fcWeight global g_fcBias # message is a rowset, one col, a list of file names. rs = msg.rowset if len(rs.columns) == 0 or rs.columns[0].nrow == 0: print("Img classify request size is 0.\n") return None print("Img classify request size is {0}.\n".format(rs.columns[0].nrow)) # Lock the fpga device. config is protected by this lock as well. fpga_lock.acquire() ret = None for i in range(rs.columns[0].nrow): fname = rs.columns[0].sdata[i] print("Running classification for images: {0}\n".format(fname)) print("Prepare inputs ...\n") # g_batchSize = 1, for now. print "g_inputs", g_inputs g_inputs[0] = xdnn_io.loadImageBlobFromFile(str(fname), g_mean, g_img_h, g_img_w) print("Quantize inputs ...\n") quantizeInputs = xdnn.quantizeInputs(g_firstFpgaLayerName, g_inputs, None, None, g_fpgaCfgFile, g_scaleB) print("Prepare inputs for fpga inputs ...\n") fpgaInputs = xdnn.prepareInputsForFpga(quantizeInputs, g_fpgaCfgFile, g_scaleB, -1, g_firstFpgaLayerName) print("Run FPGA commands ...\n") xdnn.execute(g_netFile, g_weightsBlob, fpgaInputs, g_fpgaOutput, g_batchSize, g_fpgaCfgFile, g_scaleB, g_PE) print("Compute FC ...\n") fcOutput = xdnn.computeFC(g_fcWeight, g_fcBias, g_fpgaOutput, g_batchSize, g_outputSize, g_fpgaOutputSize, g_useBlas) print("Softmax ...\n") softmaxOut = xdnn.computeSoftmax(fcOutput, g_batchSize) ret = get_classification(softmaxOut, fname) fpga_lock.release() # Now construct return msg if ret == None: print("Return None: ???\n") return None retmsg = xdrive_pb2.XMsg() rs = retmsg.rowset # return 4 columns, (filename, ordinal, score, class) col1 = rs.columns.add() col2 = rs.columns.add() col3 = rs.columns.add() col4 = rs.columns.add() col1.nrow = len(ret) col2.nrow = len(ret) col3.nrow = len(ret) col4.nrow = len(ret) for i in range(len(ret)): (a, b, c, d) = ret[i] # print("Return {0}, {1}, {2}, {3}.\n".format(a, b, c, d)) col1.nullmap.append(False) col1.sdata.append(a) col2.nullmap.append(False) col2.i32data.append(b) col3.nullmap.append(False) col3.f64data.append(c) col4.nullmap.append(False) col4.sdata.append(d) return retmsg
# # googlenet test # import sys import xdrive_pb2, server if __name__=='__main__': # Test: an echo server. sock = server.cli_connect("/tmp/ml.sock") imgs = ["apple.jpeg", "banana.jpeg", "beer.jpeg", "coffee.jpeg", "egg.jpeg", "salad.jpeg"] for ii in range(6): xmsg = xdrive_pb2.XMsg() col = xmsg.rowset.columns.add() col.nrow = 1 col.nullmap.append(False) col.sdata.append(sys.argv[1] + "/test/" + imgs[ii]) server.writeXMsg(sock, xmsg) ret = server.readXMsg(sock) col1 = ret.rowset.columns[0] col2 = ret.rowset.columns[1] col3 = ret.rowset.columns[2] col4 = ret.rowset.columns[3] nrow = ret.rowset.columns[0].nrow for i in range(nrow): print("Ret {0}: ({1}, {2}, {3}, {4}).\n".format( i, col1.sdata[i], col2.i32data[i],
def img_classify(msg): global g_args global g_ctxt # message is a rowset, one col, a list of file names. rs = msg.rowset if len(rs.columns) == 0 or rs.columns[0].nrow == 0: print("Img classify request size is 0.\n") return None print("Img classify request size is {0}.\n".format(rs.columns[0].nrow)) # Lock the fpga device. config is protected by this lock as well. fpga_lock.acquire() ret = [] if is_deploymode(): firstInput = g_ctxt['fpgaInput'].itervalues().next() firstOutput = g_ctxt['fpgaOutput'].itervalues().next() for i in xrange(0, rs.columns[0].nrow, g_args['batch_sz']): pl = [] for j in range(g_args['batch_sz']): fname = str(rs.columns[0].sdata[i + j]) print("Running classification for {0}-th images: {1}\n".format( i + j, fname)) if is_deploymode(): firstInput[j, ...], _ = xdnn_io.loadImageBlobFromFile( fname, g_args['img_raw_scale'], g_args['img_mean'], g_args['img_input_scale'], g_ctxt['inShape'][2], g_ctxt['inShape'][3]) else: g_ctxt['batch_array'][j, ...], _ = xdnn_io.loadImageBlobFromFile( fname, g_args['img_raw_scale'], g_args['img_mean'], g_args['img_input_scale'], g_ctxt['in_shape'][2], g_ctxt['in_shape'][1]) pl.append(fname) if is_deploymode(): g_ctxt['fpgaRT'].execute(g_ctxt['fpgaInput'], g_ctxt['fpgaOutput']) xdnn.computeFC(g_ctxt['fcWeight'], g_ctxt['fcBias'], firstOutput, g_ctxt['fcOutput']) else: g_ctxt['fpgaRT'].execute(g_ctxt['batch_array'], g_ctxt['fpgaOutput']) xdnn.computeFC(g_ctxt['fcWeight'], g_ctxt['fcBias'], g_ctxt['fpgaOutput'], g_args['batch_sz'], g_args['outsz'], g_args['fpgaoutsz'], g_ctxt['fcOutput']) softmaxOut = xdnn.computeSoftmax(g_ctxt['fcOutput']) ret = ret + get_classification(softmaxOut, pl, g_ctxt['labels']) fpga_lock.release() retmsg = xdrive_pb2.XMsg() rs = retmsg.rowset # return 4 columns, (filename, ordinal, score, class) col1 = rs.columns.add() col2 = rs.columns.add() col3 = rs.columns.add() col4 = rs.columns.add() col1.nrow = len(ret) col2.nrow = len(ret) col3.nrow = len(ret) col4.nrow = len(ret) for i in range(len(ret)): # print("Return {0}, {1}, {2}, {3}.\n".format(a, b, c, d)) col1.nullmap.append(False) col1.sdata.append(ret[i][0]) col2.nullmap.append(False) col2.i32data.append(ret[i][1]) col3.nullmap.append(False) col3.f64data.append(ret[i][2]) col4.nullmap.append(False) col4.sdata.append(ret[i][3]) return retmsg
def obj_detect(msg): global g_qIn, g_qOut rs = msg.rowset if len(rs.columns) == 0 or rs.columns[0].nrow == 0: print("Obj deection req size is 0.\n") return None # Input, will be a video file, start time, for how long. fname = rs.columns[0].sdata[0] start = rs.columns[1].f32data[0] duration = rs.columns[2].f32data[0] ret = [] # use opencv to get frames print ("Obj dectect on file {0}: start {1}, length {2}.\n", fname, rs.columns[1].f32data[0], rs.columns[2].f32data[0]) vc = cv2.VideoCapture(fname) # 5: fps. fps = vc.get(5) if start > 1.0: # set 0: position to milissec. # set 1: postiion to frame number vc.set(0, start * 1000) i = 0 while i <= duration * fps: i += 1 ok, frame = vc.read() if not ok: break if (i - 1) % g_skip == 0: # got a frame, do some transformation, then send it to FPGA. inputs = np.zeros((g_batchSize, g_imgc*g_imgh*g_imgw), dtype = np.float32) inputs[0] = load_yoloimg(frame) fpga_lock.acquire() g_qIn.put(inputs) outputs = g_qOut.get() fpga_lock.release() # running the rest of yolo layer in CPU. outputs = outputs.reshape(g_anchor_boxes, g_outc, g_outh, g_outw) # sigmoid outputs[:,0:2,:,:] = sigmoid(outputs[:,0:2,:,:]) outputs[:,4,:,:] = sigmoid(outputs[:,4,:,:]) for box in range(g_anchor_boxes): outputs[box,5:,:,:] = softmax(outputs[box,5:,:,:]) bboxes = nms.do_baseline_nms(outputs.flat, frame.shape[1], frame.shape[0], g_imgw, g_imgh, g_outw, g_outh, g_anchor_boxes, g_classes, g_scorethresh, g_iouthresh ) for j in range(len(bboxes)): cls = coconames(bboxes[j]['classid']) if cls is None: continue llx = bboxes[j]['ll']['x'] lly = bboxes[j]['ll']['y'] urx = bboxes[j]['ur']['x'] ury = bboxes[j]['ur']['y'] # very tall/wide objects, we don't want to covering bbox if ((urx-llx) > frame.shape[1] * 0.5) or ((lly - ury) > frame.shape[0] * 0.5): continue # and avoid objects less than 30x30. if (urx-llx > 30) and (lly-ury > 30): objimg = frame[ury:lly, llx:urx] objimg_str = cv2.imencode('.jpg', objimg)[1].tostring() objimg_str = base64.b64encode(objimg_str) ret.append((i, cls, bboxes[j]['prob'], llx, lly, urx, ury, objimg_str)) vc.release() # return resuts retmsg = xdrive_pb2.XMsg() rs = retmsg.rowset col1 = rs.columns.add() col2 = rs.columns.add() col3 = rs.columns.add() col4 = rs.columns.add() col5 = rs.columns.add() col6 = rs.columns.add() col7 = rs.columns.add() col8 = rs.columns.add() col1.nrow = len(ret) col2.nrow = len(ret) col3.nrow = len(ret) col4.nrow = len(ret) col5.nrow = len(ret) col6.nrow = len(ret) col7.nrow = len(ret) col8.nrow = len(ret) for r in ret: col1.nullmap.append(False) col1.i32data.append(r[0]) col2.nullmap.append(False) col2.sdata.append(r[1]) col3.nullmap.append(False) col3.f32data.append(r[2]) col4.nullmap.append(False) col4.f32data.append(r[3]) col5.nullmap.append(False) col5.f32data.append(r[4]) col6.nullmap.append(False) col6.f32data.append(r[5]) col7.nullmap.append(False) col7.f32data.append(r[6]) col8.nullmap.append(False) col8.sdata.append(r[7]) return retmsg
def img_classify(msg): global g_cInputBuffer global g_cFpgaInputBuffer # message is a rowset, one col, a list of file names. rs = msg.rowset if len(rs.columns) == 0 or rs.columns[0].nrow == 0: print("Img classify request size is 0.\n") return None print("Img classify request size is {0}.\n".format(rs.columns[0].nrow)) # Lock the fpga device. config is protected by this lock as well. fpga_lock.acquire() ret = None for i in range(rs.columns[0].nrow): fname = rs.columns[0].sdata[i] print("Running classification for images: {0}\n".format(fname)) print("Prepare inputs ...\n") # g_batchSize = 1, for now. config["g_inputs"][0] = pyxfdnn_io.loadImageBlobFromFile( fname, config["img_mean"], g_imgh, g_imgw) print("Quantize inputs ...\n") quantizeInputs = pyxfdnn.quantizeInputs( config["firstfpgalayer"], config["g_inputs"], g_cInputBuffer, g_cFpgaInputBuffer, config["quantizecfg"], config["scaleB"]) print("Prepare inputs for fpga inputs ...\n") fpgaInputs = pyxfdnn.prepareInputsForFpga(quantizeInputs, config["quantizecfg"], config["scaleB"], -1, config["firstfpgalayer"]) print("Run FPGA commands ...\n") pyxfdnn.execute( config["fpgacommands"], config["weightsBlob"], fpgaInputs, config["g_fpgaOutput"], g_batchSize, config["quantizecfg"], config["scaleB"] # # This is freaking insane. What is PE? # # Xilinx notebook uses PE = 0, which works for a few images then crash. # Xilinx example batch_classify.py says do not supply this PE paramenter, # then default is -1. Runs fine for many images. # # , config["PE"] # ) print("Compute FC ...\n") fcOut = pyxfdnn.computeFC(config["fcWeight"], config["fcBias"], config["g_fpgaOutput"], g_batchSize, config["outsz"], config["fpgaoutsz"], config["useblas"]) print("Softmax ...\n") softmaxOut = pyxfdnn.computeSoftmax(fcOut, g_batchSize) ret = get_classification(softmaxOut, fname, config) fpga_lock.release() # Now construct return msg if ret == None: print("Return None: ???\n") return None retmsg = xdrive_pb2.XMsg() rs = retmsg.rowset # return 4 columns, (filename, ordinal, score, class) col1 = rs.columns.add() col2 = rs.columns.add() col3 = rs.columns.add() col4 = rs.columns.add() col1.nrow = len(ret) col2.nrow = len(ret) col3.nrow = len(ret) col4.nrow = len(ret) for i in range(len(ret)): (a, b, c, d) = ret[i] # print("Return {0}, {1}, {2}, {3}.\n".format(a, b, c, d)) col1.nullmap.append(False) col1.sdata.append(a) col2.nullmap.append(False) col2.i32data.append(b) col3.nullmap.append(False) col3.f64data.append(c) col4.nullmap.append(False) col4.sdata.append(d) return retmsg