def run(self, imgList, fpgaOutput_list, fpgaOutputShape_list, shapeArr): if self.numProcessed == 0: self.startTime = timeit.default_timer() self.labels = xdnn_io.get_labels(self.args['labels']) self.zmqPub = None if self.args['zmqpub']: self.zmqPub = mp_classify.ZmqResultPublisher( self.args['deviceID']) self.goldenMap = None self.numProcessed += len(imgList) firstInputShape = xdnn.CompilerJsonParser( self.args['netcfg']).getInputs().itervalues().next() if ((args['yolo_model'] == 'standard_yolo_v3') or (args['yolo_model'] == 'tiny_yolo_v3')): num_ouptut_layers = len(fpgaOutput_list) fpgaOutput = [] for idx in range(num_ouptut_layers): fpgaOutput.append( np.frombuffer(fpgaOutput_list[idx], dtype=np.float32).reshape( tuple(fpgaOutputShape_list[idx]))) bboxlist_for_images = det_postprocess(fpgaOutput, args, shapeArr) for i in range(min(self.args['batch_sz'], len(shapeArr))): print "image: ", imgList[ i], " has num boxes detected : ", len( bboxlist_for_images[i]) else: fpgaOutput = fpgaOutput_list[0] fpgaOutputShape = fpgaOutputShape_list[0] npout_view = np.frombuffer(fpgaOutput, dtype=np.float32)\ .reshape(tuple(fpgaOutputShape)) npout_view = npout_view.flatten() fpgaoutsz = fpgaOutputShape[1] * fpgaOutputShape[ 2] * fpgaOutputShape[3] bboxlist_for_images = [] for i in range(min(self.args['batch_sz'], len(shapeArr))): startidx = i * fpgaoutsz softmaxout = npout_view[startidx:startidx + fpgaoutsz] # first activate first two channels of each bbox subgroup (n) for b in range(self.args['bboxplanes']): for r in range(\ self.args['batchstride']*b, self.args['batchstride']*b+2*self.args['groups']): softmaxout[r] = sigmoid(softmaxout[r]) for r in range(\ self.args['batchstride']*b\ +self.args['groups']*self.args['coords'], self.args['batchstride']*b\ +self.args['groups']*self.args['coords']+self.args['groups']): softmaxout[r] = sigmoid(softmaxout[r]) # Now softmax on all classification arrays in image for b in range(self.args['bboxplanes']): for g in range(self.args['groups']): softmax( self.args['beginoffset'] + b * self.args['batchstride'] + g * self.args['groupstride'], softmaxout, softmaxout, self.args['outsz'], self.args['groups']) # NMS bboxes = nms.do_baseline_nms( softmaxout, shapeArr[i][1], shapeArr[i][0], firstInputShape[2], firstInputShape[3], self.args['out_w'], self.args['out_h'], self.args['bboxplanes'], self.args['outsz'], self.args['scorethresh'], self.args['iouthresh']) bboxlist_for_images.append(bboxes) print "image: ", imgList[ i], " has num boxes detected : ", len(bboxes) if self.args['golden'] is None: return for i in range(min(self.args['batch_sz'], len(shapeArr))): filename = imgList[i] out_file_txt = ((filename.split("/")[-1]).split(".")[0]) out_file_txt = self.args[ 'detection_labels'] + "/" + out_file_txt + ".txt" out_line_list = [] bboxes = bboxlist_for_images[i] for j in range(len(bboxes)): x, y, w, h = darknet_style_xywh(shapeArr[i][1], shapeArr[i][0], bboxes[j]["ll"]["x"], bboxes[j]["ll"]["y"], bboxes[j]['ur']['x'], bboxes[j]['ur']['y']) line_string = str(bboxes[j]["classid"]) line_string = line_string + " " + str( round(bboxes[j]['prob'], 3)) line_string = line_string + " " + str(x) line_string = line_string + " " + str(y) line_string = line_string + " " + str(w) line_string = line_string + " " + str(h) out_line_list.append(line_string + "\n") log.info("writing this into prediction file at %s" % (out_file_txt)) with open(out_file_txt, "w") as the_file: for lines in out_line_list: the_file.write(lines)
def bbox_stage(config, q_bbox): results = [] while True: payload = q_bbox.get() if payload == None: break (job, fpgaOutput) = payload images = job['images'] display = job['display'] coco = job['coco'] for i in range(config['batch_sz']): log.info("Results for image %d: %s" % (i, images[i])) startidx = i * config['outsize'] softmaxout = fpgaOutput[startidx:startidx + config['outsize']] # first activate first two channels of each bbox subgroup (n) for b in range(config['bboxplanes']): for r in range( config['batchstride'] * b, config['batchstride'] * b + 2 * config['groups']): softmaxout[r] = sigmoid(softmaxout[r]) for r in range( config['batchstride'] * b + config['groups'] * config['coords'], config['batchstride'] * b + config['groups'] * config['coords'] + config['groups']): softmaxout[r] = sigmoid(softmaxout[r]) # Now softmax on all classification arrays in image for b in range(config['bboxplanes']): for g in range(config['groups']): softmax( config['beginoffset'] + b * config['batchstride'] + g * config['groupstride'], softmaxout, softmaxout, config['classes'], config['groups']) # NMS bboxes = nms.do_baseline_nms( softmaxout, job['shapes'][i][1], job['shapes'][i][0], config['net_w'], config['net_h'], config['out_w'], config['out_h'], config['bboxplanes'], config['classes'], config['scorethresh'], config['iouthresh']) # REPORT BOXES log.info("Found %d boxes" % (len(bboxes))) for j in range(len(bboxes)): log.info("Obj %d: %s" % (j, config['names'][bboxes[j]['classid']])) log.info("\t score = %f" % (bboxes[j]['prob'])) log.info("\t (xlo,ylo) = (%d,%d)" % (bboxes[j]['ll']['x'], bboxes[j]['ll']['y'])) log.info("\t (xhi,yhi) = (%d,%d)" % (bboxes[j]['ur']['x'], bboxes[j]['ur']['y'])) filename = images[i] if coco: image_id = int(((filename.split("/")[-1] ).split("_")[-1]).split(".")[0]) else: image_id = filename.split("/")[-1] x, y, w, h = cornersToxywh(bboxes[j]["ll"]["x"], bboxes[j]["ll"]["y"], bboxes[j]['ur']['x'], bboxes[j]['ur']['y']) result = { "image_id": image_id, "category_id": config['cats'][bboxes[j]["classid"]], "bbox": [x, y, w, h], "score": round(bboxes[j]['prob'], 3) } results.append(result) # DRAW BOXES w/ LABELS if display: draw_boxes(images[i], bboxes, config['names'], config['colors']) log.info("Saving results as results.json") with open("results.json", "w") as fp: fp.write(json.dumps(results, sort_keys=True, indent=4))
def main(argv=None): args = xdnn_io.processCommandLine(argv) startTime = timeit.default_timer() ret = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0", args['xlnxlib']) if ret != 0: sys.exit(1) elapsedTime = timeit.default_timer() - startTime print "\nTime to createHandle (%f ms):" % (elapsedTime * 1000) # we do not need other args keys except 'jsoncfg' args = args['jsoncfg'] netCfgs = defaultdict(dict) confNames = [] startTime = timeit.default_timer() for streamId, netCfg_args in enumerate(args): confName = str(netCfg_args['name']) confNames += [confName] netCfg_args['netcfg'] = './data/{}_{}.cmd'.format( netCfg_args['net'], netCfg_args['dsp']) netCfgs[confName]['streamId'] = streamId netCfgs[confName]['args'] = netCfg_args (netCfgs[confName]['weightsBlobs'], netCfgs[confName]['fcWeights'], netCfgs[confName]['fcBiases']) = xdnn_io.loadWeights(netCfg_args) netCfgs[confName]['batch_sz'] = 1 netCfgs[confName]['fpgaOutputs'] = xdnn_io.prepareOutput( netCfg_args["fpgaoutsz"], netCfgs[confName]['batch_sz']) elapsedTime = timeit.default_timer() - startTime print "\nTime to init (%f ms):" % (elapsedTime * 1000) ## run YOLO confName = 'yolo' netCfg = netCfgs[confName] startTime = timeit.default_timer() (netCfg['fpgaInputs'], netCfg['batch_sz'], netCfg['shapes']) = xdnn_io.prepareInput(netCfg['args'], netCfg['args']['PE']) elapsedTime = timeit.default_timer() - startTime print "\nTime to transfer input image to FPGA (%f ms):" % (elapsedTime * 1000) startTime = timeit.default_timer() xdnn.exec_async(netCfg['args']['netcfg'], netCfg['weightsBlobs'], netCfg['fpgaInputs'], netCfg['fpgaOutputs'], netCfg['batch_sz'], netCfg['args']['quantizecfg'], netCfg['args']['scaleB'], netCfg['args']['PE'], netCfg['streamId']) elapsedTime = timeit.default_timer() - startTime print "\nTime to execute Yolo on FPGA (%f ms):" % (elapsedTime * 1000) startTime = timeit.default_timer() xdnn.get_result(netCfg['args']['PE'], netCfg['streamId']) elapsedTime = timeit.default_timer() - startTime print "\nTime to retrieve yolo outputs from FPGA (%f ms):" % (elapsedTime * 1000) startTime = timeit.default_timer() out_h = \ out_w = netCfg['args']['in_shape'][1] / 32 anchor_boxes = 5 objectness = 1 coordinates = 4 classes = 80 out_c = objectness + coordinates + classes # Reshape the fpgaOutputs into a 4D volume yolo_outputs = netCfg['fpgaOutputs'].reshape(anchor_boxes, out_c, out_h, out_w) # Apply sigmoid to 1st, 2nd, 4th channel for all anchor boxes yolo_outputs[:, 0:2, :, :] = sigmoid( yolo_outputs[:, 0:2, :, :]) # (X,Y) Predictions yolo_outputs[:, 4, :, :] = sigmoid( yolo_outputs[:, 4, :, :]) # Objectness / Box Confidence # Apply softmax on the class scores foreach anchor box for box in range(anchor_boxes): yolo_outputs[box, 5:, :, :] = softmax(yolo_outputs[box, 5:, :, :]) # Perform Non-Max Suppression # Non-Max Suppression filters out detections with a score lesser than 0.24 # Additionally if there are two predections with an overlap > 30%, the prediction with the lower score will be filtered scorethresh = 0.24 iouthresh = 0.3 bboxes = nms.do_baseline_nms(yolo_outputs.flat, netCfg['shapes'][0][1], netCfg['shapes'][0][0], netCfg['args']['in_shape'][2], netCfg['args']['in_shape'][1], out_w, out_h, anchor_boxes, classes, scorethresh, iouthresh) with open(netCfg['args']['labels']) as f: namez = f.readlines() names = [x.strip() for x in namez] # Lets print the detections our model made for j in range(len(bboxes)): print("Obj %d: %s" % (j, names[bboxes[j]['classid']])) print("\t score = %f" % (bboxes[j]['prob'])) print("\t (xlo,ylo) = (%d,%d)" % (bboxes[j]['ll']['x'], bboxes[j]['ll']['y'])) print("\t (xhi,yhi) = (%d,%d)" % (bboxes[j]['ur']['x'], bboxes[j]['ur']['y'])) elapsedTime = timeit.default_timer() - startTime print "\nTime to execute on CPU (%f ms):" % (elapsedTime * 1000) startTime = timeit.default_timer() img = cv2.imread(netCfg['args']['images'][0]) #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # YOLO was trained with RGB, not BGR like Caffe # choose one of the bounding boxes obj_idx = 0 # specify a margin added to the selected bounding box margin = 10 H_slice = slice(max(0, bboxes[obj_idx]['ur']['y'] - margin), min(img.shape[0], bboxes[obj_idx]['ll']['y'] + margin)) W_slice = slice(max(0, bboxes[obj_idx]['ll']['x'] - margin), min(img.shape[1], bboxes[obj_idx]['ur']['x'] + margin)) img = img[H_slice, W_slice, :] print('pass obj {}: {} with size {} to googlenet'.format( obj_idx, names[bboxes[obj_idx]['classid']], img.shape)) cv2.imwrite('cropped_yolo_output.jpg', img) ''' if img.shape[-1] == 1 or img.shape[-1] == 3: # [H, W, C] old_dims = np.array(img.shape[:2], dtype=float) else: # [C, H, W] old_dims = np.array(img.shape[1:], dtype=float) ''' ## run GOOGLENET confName = 'googlenet' netCfg = netCfgs[confName] ''' new_dims = netCfg['args']['in_shape'] if new_dims[-1] == 1 or new_dims[-1] == 3: # [H, W, C] new_dims = np.array(new_dims[:2], dtype=int) else: # [C, H, W] new_dims = np.array(new_dims[1:], dtype=int) scale_dims = new_dims.copy() min_scale_idx = np.argmin(old_dims/new_dims) if min_scale_idx == 0: scale_dims[1] = scale_dims[0] * old_dims[1] / old_dims[0] else: scale_dims[0] = scale_dims[1] * old_dims[0] / old_dims[1] scale_dims = scale_dims.astype(int) # transform input image to match googlenet # scale the image print('scale image to {}'.format(scale_dims)) img = resize_image(img, list(scale_dims)) cv2.imwrite('rescaled_scaled.jpg', img) # crop the image crop_idxs = [np.arange(new_dims[i]) + int((scale_dims[i]-new_dims[i])/2) for i in range(2)] if img.shape[-1] == 1 or img.shape[-1] == 3: # [H, W, C] img = img[crop_idxs[0].reshape(-1,1), crop_idxs[1], :] else: # [C, H, W] img = img[:, crop_idxs[0].reshape(-1,1), crop_idxs[1]] print('crop image to {}'.format(img.shape)) cv2.imwrite('rescaled_cropped.jpg', img) #img = np.transpose(img, (2, 0, 1)) #cv2.imwrite('rescaled_transposed.jpg', img) ''' netCfg['args']['images'] = [img] elapsedTime = timeit.default_timer() - startTime print "\nTime to prepare googlenet image on CPU (%f ms):" % (elapsedTime * 1000) startTime = timeit.default_timer() (netCfg['fpgaInputs'], netCfg['batch_sz'], netCfg['shapes']) = xdnn_io.prepareInput(netCfg['args'], netCfg['args']['PE']) elapsedTime = timeit.default_timer() - startTime print "\nTime to transfer input image to FPGA (%f ms):" % (elapsedTime * 1000) startTime = timeit.default_timer() xdnn.exec_async(netCfg['args']['netcfg'], netCfg['weightsBlobs'], netCfg['fpgaInputs'], netCfg['fpgaOutputs'], netCfg['batch_sz'], netCfg['args']['quantizecfg'], netCfg['args']['scaleB'], netCfg['args']['PE'], netCfg['streamId']) elapsedTime = timeit.default_timer() - startTime print "\nTime to execute googlenet on FPGA (%f ms):" % (elapsedTime * 1000) startTime = timeit.default_timer() xdnn.get_result(netCfg['args']['PE'], netCfg['streamId']) elapsedTime = timeit.default_timer() - startTime print "\nTime to retrieve googlenet outputs from FPGA (%f ms):" % ( elapsedTime * 1000) startTime = timeit.default_timer() fcOut = np.empty((netCfg['batch_sz'] * netCfg['args']['outsz']), dtype=np.float32, order='C') xdnn.computeFC(netCfg['fcWeights'], netCfg['fcBiases'], netCfg['fpgaOutputs'], netCfg['batch_sz'], netCfg['args']['outsz'], netCfg['args']['fpgaoutsz'], fcOut) elapsedTime = timeit.default_timer() - startTime print "\nTime to run FC layers on CPU (%f ms):" % (elapsedTime * 1000) startTime = timeit.default_timer() softmaxOut = xdnn.computeSoftmax(fcOut, netCfg['batch_sz']) elapsedTime = timeit.default_timer() - startTime print "\nTime to run Softmax on CPU (%f ms):" % (elapsedTime * 1000) xdnn_io.printClassification(softmaxOut, netCfg['args']) print "\nSuccess!\n" xdnn.closeHandle()
def bbox_stage(config, q_bbox, maxNumIters=-1): results = [] numIters = 0 while True: numIters += 1 if maxNumIters > 0 and numIters > maxNumIters: break payload = q_bbox.get() if payload == None: break (job, fpgaOutput) = payload images = job['images'] display = job['display'] coco = job['coco'] if ((config['yolo_model'] == 'standard_yolo_v3') or (config['yolo_model'] == 'tiny_yolo_v3') or (config['yolo_model'] == 'spp_yolo_v3')): anchorCnt = config['anchorCnt'] classes = config['classes'] if (config['yolo_model'] == 'tiny_yolo_v3'): classes = 80 #config['classes'] = 3 #print "classes fpgaOutput len", classes, len(fpgaOutput) out_yolo_layers = process_all_yolo_layers( fpgaOutput, classes, anchorCnt, config['net_w'], config['net_h']) num_proposals_layer = [0] total_proposals = 0 for layr_idx in range(len(out_yolo_layers)): yolo_layer_shape = out_yolo_layers[layr_idx].shape #print "layr_idx , yolo_layer_shape", layr_idx , yolo_layer_shape out_yolo_layers[layr_idx] = out_yolo_layers[ layr_idx].reshape( yolo_layer_shape[0], anchorCnt, (5 + classes), yolo_layer_shape[2] * yolo_layer_shape[3]) out_yolo_layers[layr_idx] = out_yolo_layers[ layr_idx].transpose(0, 3, 1, 2) out_yolo_layers[layr_idx] = out_yolo_layers[ layr_idx].reshape( yolo_layer_shape[0], yolo_layer_shape[2] * yolo_layer_shape[3] * anchorCnt, (5 + classes)) #print "layr_idx, final in layer sape, outlayer shape", layr_idx, yolo_layer_shape, out_yolo_layers[layr_idx].shape total_proposals += yolo_layer_shape[2] * yolo_layer_shape[ 3] * anchorCnt num_proposals_layer.append(total_proposals) boxes_array = np.empty( [config['batch_sz'], total_proposals, (5 + classes)]) for layr_idx in range(len(out_yolo_layers)): proposal_st = num_proposals_layer[layr_idx] proposal_ed = num_proposals_layer[layr_idx + 1] #print "proposal_st proposal_ed", proposal_st, proposal_ed boxes_array[:, proposal_st:proposal_ed, :] = out_yolo_layers[ layr_idx][...] for i in range(config['batch_sz']): boxes_array[i, :, :] = correct_region_boxes( boxes_array[i, :, :], 0, 1, 2, 3, float(job['shapes'][i][1]), float(job['shapes'][i][0]), float(config['net_w']), float(config['net_h'])) detected_boxes = apply_nms(boxes_array[i, :, :], classes, config['scorethresh'], config['iouthresh']) bboxes = [] for det_idx in range(len(detected_boxes)): #print detected_boxes[det_idx][0], detected_boxes[det_idx][1], detected_boxes[det_idx][2], detected_boxes[det_idx][3], config['names'][detected_boxes[det_idx][4]], detected_boxes[det_idx][5] bboxes.append({ 'classid': detected_boxes[det_idx][4], 'prob': detected_boxes[det_idx][5], 'll': { 'x': int((detected_boxes[det_idx][0] - 0.5 * detected_boxes[det_idx][2]) * job['shapes'][i][1]), 'y': int((detected_boxes[det_idx][1] + 0.5 * detected_boxes[det_idx][3]) * job['shapes'][i][0]) }, 'ur': { 'x': int((detected_boxes[det_idx][0] + 0.5 * detected_boxes[det_idx][2]) * job['shapes'][i][1]), 'y': int((detected_boxes[det_idx][1] - 0.5 * detected_boxes[det_idx][3]) * job['shapes'][i][0]) } }) log.info("Obj %d: %s" % (det_idx, config['names'][bboxes[det_idx]['classid']])) log.info("\t score = %f" % (bboxes[det_idx]['prob'])) log.info("\t (xlo,ylo) = (%d,%d)" % (bboxes[det_idx]['ll']['x'], bboxes[det_idx]['ll']['y'])) log.info("\t (xhi,yhi) = (%d,%d)" % (bboxes[det_idx]['ur']['x'], bboxes[det_idx]['ur']['y'])) if display: draw_boxes(images[i], bboxes, config['names'], config['colors']) filename = images[i] out_file_txt = ((filename.split("/")[-1]).split(".")[0]) out_file_txt = config[ 'out_labels_path'] + "/" + out_file_txt + ".txt" out_line_list = [] for j in range(len(bboxes)): #x,y,w,h = darknet_style_xywh(job['shapes'][i][1], job['shapes'][i][0], bboxes[j]["ll"]["x"],bboxes[j]["ll"]["y"],bboxes[j]['ur']['x'],bboxes[j]['ur']['y']) x = detected_boxes[j][0] y = detected_boxes[j][1] w = detected_boxes[j][2] h = detected_boxes[j][3] line_string = str(bboxes[j]["classid"]) line_string = line_string + " " + str( round(bboxes[j]['prob'], 3)) line_string = line_string + " " + str(x) line_string = line_string + " " + str(y) line_string = line_string + " " + str(w) line_string = line_string + " " + str(h) out_line_list.append(line_string + "\n") log.info("writing this into prediction file at %s" % (out_file_txt)) with open(out_file_txt, "w") as the_file: for lines in out_line_list: the_file.write(lines) continue fpgaOutput = fpgaOutput.flatten() for i in range(config['batch_sz']): log.info("Results for image %d: %s" % (i, images[i])) startidx = i * config['outsize'] softmaxout = fpgaOutput[startidx:startidx + config['outsize']] # first activate first two channels of each bbox subgroup (n) for b in range(config['bboxplanes']): for r in range( config['batchstride'] * b, config['batchstride'] * b + 2 * config['groups']): softmaxout[r] = sigmoid(softmaxout[r]) for r in range( config['batchstride'] * b + config['groups'] * config['coords'], config['batchstride'] * b + config['groups'] * config['coords'] + config['groups']): softmaxout[r] = sigmoid(softmaxout[r]) # Now softmax on all classification arrays in image for b in range(config['bboxplanes']): for g in range(config['groups']): softmax( config['beginoffset'] + b * config['batchstride'] + g * config['groupstride'], softmaxout, softmaxout, config['classes'], config['groups']) # NMS bboxes = nms.do_baseline_nms( softmaxout, job['shapes'][i][1], job['shapes'][i][0], config['net_w'], config['net_h'], config['out_w'], config['out_h'], config['bboxplanes'], config['classes'], config['scorethresh'], config['iouthresh']) # REPORT BOXES log.info("Found %d boxes" % (len(bboxes))) filename = images[i] out_file_txt = ((filename.split("/")[-1]).split(".")[0]) out_file_txt = config[ 'out_labels_path'] + "/" + out_file_txt + ".txt" out_line_list = [] for j in range(len(bboxes)): log.info("Obj %d: %s" % (j, config['names'][bboxes[j]['classid']])) log.info("\t score = %f" % (bboxes[j]['prob'])) log.info("\t (xlo,ylo) = (%d,%d)" % (bboxes[j]['ll']['x'], bboxes[j]['ll']['y'])) log.info("\t (xhi,yhi) = (%d,%d)" % (bboxes[j]['ur']['x'], bboxes[j]['ur']['y'])) filename = images[i] if coco: image_id = int(((filename.split("/")[-1] ).split("_")[-1]).split(".")[0]) else: image_id = filename.split("/")[-1] x, y, w, h = cornersToxywh(bboxes[j]["ll"]["x"], bboxes[j]["ll"]["y"], bboxes[j]['ur']['x'], bboxes[j]['ur']['y']) result = { "image_id": image_id, "category_id": config['cats'][bboxes[j]["classid"]], "bbox": [x, y, w, h], "score": round(bboxes[j]['prob'], 3) } results.append(result) x, y, w, h = darknet_style_xywh(job['shapes'][i][1], job['shapes'][i][0], bboxes[j]["ll"]["x"], bboxes[j]["ll"]["y"], bboxes[j]['ur']['x'], bboxes[j]['ur']['y']) line_string = str(bboxes[j]["classid"]) line_string = line_string + " " + str( round(bboxes[j]['prob'], 3)) line_string = line_string + " " + str(x) line_string = line_string + " " + str(y) line_string = line_string + " " + str(w) line_string = line_string + " " + str(h) out_line_list.append(line_string + "\n") # DRAW BOXES w/ LABELS if display: draw_boxes(images[i], bboxes, config['names'], config['colors']) log.info("writing this into prediction file at %s" % (out_file_txt)) with open(out_file_txt, "w") as the_file: for lines in out_line_list: the_file.write(lines) log.info("Saving results as results.json") with open("results.json", "w") as fp: fp.write(json.dumps(results, sort_keys=True, indent=4))
def obj_detect(msg): global g_qIn, g_qOut rs = msg.rowset if len(rs.columns) == 0 or rs.columns[0].nrow == 0: print("Obj deection req size is 0.\n") return None # Input, will be a video file, start time, for how long. fname = rs.columns[0].sdata[0] start = rs.columns[1].f32data[0] duration = rs.columns[2].f32data[0] ret = [] # use opencv to get frames print ("Obj dectect on file {0}: start {1}, length {2}.\n", fname, rs.columns[1].f32data[0], rs.columns[2].f32data[0]) vc = cv2.VideoCapture(fname) # 5: fps. fps = vc.get(5) if start > 1.0: # set 0: position to milissec. # set 1: postiion to frame number vc.set(0, start * 1000) i = 0 while i <= duration * fps: i += 1 ok, frame = vc.read() if not ok: break if (i - 1) % g_skip == 0: # got a frame, do some transformation, then send it to FPGA. inputs = np.zeros((g_batchSize, g_imgc*g_imgh*g_imgw), dtype = np.float32) inputs[0] = load_yoloimg(frame) fpga_lock.acquire() g_qIn.put(inputs) outputs = g_qOut.get() fpga_lock.release() # running the rest of yolo layer in CPU. outputs = outputs.reshape(g_anchor_boxes, g_outc, g_outh, g_outw) # sigmoid outputs[:,0:2,:,:] = sigmoid(outputs[:,0:2,:,:]) outputs[:,4,:,:] = sigmoid(outputs[:,4,:,:]) for box in range(g_anchor_boxes): outputs[box,5:,:,:] = softmax(outputs[box,5:,:,:]) bboxes = nms.do_baseline_nms(outputs.flat, frame.shape[1], frame.shape[0], g_imgw, g_imgh, g_outw, g_outh, g_anchor_boxes, g_classes, g_scorethresh, g_iouthresh ) for j in range(len(bboxes)): cls = coconames(bboxes[j]['classid']) if cls is None: continue llx = bboxes[j]['ll']['x'] lly = bboxes[j]['ll']['y'] urx = bboxes[j]['ur']['x'] ury = bboxes[j]['ur']['y'] # very tall/wide objects, we don't want to covering bbox if ((urx-llx) > frame.shape[1] * 0.5) or ((lly - ury) > frame.shape[0] * 0.5): continue # and avoid objects less than 30x30. if (urx-llx > 30) and (lly-ury > 30): objimg = frame[ury:lly, llx:urx] objimg_str = cv2.imencode('.jpg', objimg)[1].tostring() objimg_str = base64.b64encode(objimg_str) ret.append((i, cls, bboxes[j]['prob'], llx, lly, urx, ury, objimg_str)) vc.release() # return resuts retmsg = xdrive_pb2.XMsg() rs = retmsg.rowset col1 = rs.columns.add() col2 = rs.columns.add() col3 = rs.columns.add() col4 = rs.columns.add() col5 = rs.columns.add() col6 = rs.columns.add() col7 = rs.columns.add() col8 = rs.columns.add() col1.nrow = len(ret) col2.nrow = len(ret) col3.nrow = len(ret) col4.nrow = len(ret) col5.nrow = len(ret) col6.nrow = len(ret) col7.nrow = len(ret) col8.nrow = len(ret) for r in ret: col1.nullmap.append(False) col1.i32data.append(r[0]) col2.nullmap.append(False) col2.sdata.append(r[1]) col3.nullmap.append(False) col3.f32data.append(r[2]) col4.nullmap.append(False) col4.f32data.append(r[3]) col5.nullmap.append(False) col5.f32data.append(r[4]) col6.nullmap.append(False) col6.f32data.append(r[5]) col7.nullmap.append(False) col7.f32data.append(r[6]) col8.nullmap.append(False) col8.sdata.append(r[7]) return retmsg
def yolo_gpu_inference(backend_path, class_names_file, image_dir, deploy_model, weights, out_labels, IOU_threshold, scorethresh, dims, mean_value, pxscale, transpose, channel_swap, yolo_model, num_classes, class_names): #sys.path.insert(0,'/data/users/Repos/XLNX_Internal_Repos/ristretto_chai/ristretto/python') #sys.path.insert(0,'/data/users/arun/ML_retrain_testing/caffe/framework/python') #sys.path.insert(0,backend_path) # sys.path.insert(0, '/wrk/acceleration/users/arun/caffe/python') import caffe #deploy_model = "../../models/caffe/yolov2/fp32/yolov2_224_without_bn_train_quantized_8Bit.prototxt" #deploy_model = "../../models/caffe/yolov2/fp32/yolo_deploy_608.prototxt" #weights = "../../models/caffe/yolov2/fp32/yolov2.caffemodel" net = caffe.Net(deploy_model, weights, caffe.TEST) net_parameter = caffe.proto.caffe_pb2.NetParameter() caffe.set_mode_cpu() last_layer_name = next(reversed(net.layer_dict)) classes = num_classes bboxplanes = 5 net_w = dims[1] net_h = dims[2] import math out_w = int(math.ceil(net_w / 32.0)) out_h = int(math.ceil(net_h / 32.0)) groups = out_w*out_h coords = 4 groupstride = 1 batchstride = (groups) * (classes + coords+1) beginoffset = (coords+1) * (out_w * out_h) #scorethresh = 0.24 #iouthresh = 0.3 iouthresh = IOU_threshold #colors = generate_colors(classes) #imgDir = "../../xfdnn/tools/quantize/calibration_directory" #imgDir = "/wrk/acceleration/shareData/COCO_Dataset/val2014" images = sorted([os.path.join(image_dir,name) for name in os.listdir(image_dir)]) for i,img in enumerate(images): raw_img, s = prep_image(img, net_w, net_h, pxscale, 0.5, transpose,channel_swap) net.blobs['data'].data[...] = raw_img net.forward() fpgaOutput=[] if (yolo_model == 'standard_yolo_v3'): fpgaOutput.append(net.blobs['layer81-conv'].data[...]) fpgaOutput.append(net.blobs['layer93-conv'].data[...]) fpgaOutput.append(net.blobs['layer105-conv'].data[...]) anchorCnt = 3 print "classes fpgaOutput len", classes, len(fpgaOutput) out_yolo_layers = process_all_yolo_layers(fpgaOutput, classes, anchorCnt, net_w, net_h) num_proposals_layer=[0] total_proposals = 0 for layr_idx in range (len(out_yolo_layers)): yolo_layer_shape = out_yolo_layers[layr_idx].shape print "layr_idx , yolo_layer_shape", layr_idx , yolo_layer_shape out_yolo_layers[layr_idx] = out_yolo_layers[layr_idx].reshape(yolo_layer_shape[0], anchorCnt, (5+classes), yolo_layer_shape[2]*yolo_layer_shape[3]) out_yolo_layers[layr_idx] = out_yolo_layers[layr_idx].transpose(0,3,1,2) out_yolo_layers[layr_idx] = out_yolo_layers[layr_idx].reshape(yolo_layer_shape[0],yolo_layer_shape[2]*yolo_layer_shape[3] * anchorCnt, (5+classes)) print "layr_idx, final in layer sape, outlayer shape", layr_idx, yolo_layer_shape, out_yolo_layers[layr_idx].shape total_proposals += yolo_layer_shape[2]*yolo_layer_shape[3] * anchorCnt num_proposals_layer.append(total_proposals) boxes_array = np.empty([1, total_proposals, (5+classes)]) for layr_idx in range (len(out_yolo_layers)): proposal_st = num_proposals_layer[layr_idx] proposal_ed = num_proposals_layer[layr_idx + 1] print "proposal_st proposal_ed", proposal_st, proposal_ed boxes_array[:,proposal_st:proposal_ed,:] = out_yolo_layers[layr_idx][...] boxes_array[0,:,:] = correct_region_boxes(boxes_array[0,:,:], 0, 1, 2, 3, float(s[1]), float(s[0]), float(net_w), float(net_h)) detected_boxes = apply_nms(boxes_array[i,:,:], classes, iouthresh) bboxes=[] for det_idx in range(len(detected_boxes)): print detected_boxes[det_idx][0], detected_boxes[det_idx][1], detected_boxes[det_idx][2], detected_boxes[det_idx][3], class_names[detected_boxes[det_idx][4]], detected_boxes[det_idx][5] bboxes.append({'classid' : detected_boxes[det_idx][4], 'prob' : detected_boxes[det_idx][5], 'll' : {'x' : int((detected_boxes[det_idx][0] - 0.5 *detected_boxes[det_idx][2]) * job['shapes'][i][1]), 'y' : int((detected_boxes[det_idx][1] + 0.5 *detected_boxes[det_idx][3]) * job['shapes'][i][0])}, 'ur' : {'x' : int((detected_boxes[det_idx][0] + 0.5 *detected_boxes[det_idx][2]) * job['shapes'][i][1]), 'y' : int((detected_boxes[det_idx][1] - 0.5 *detected_boxes[det_idx][3]) * job['shapes'][i][0])}}) else: data=net.blobs[last_layer_name].data[...] gpu_out= np.copy(data) #print("gpu_out.shape = ", gpu_out.shape) softmaxout = gpu_out.flatten() #print("softmaxout.shape = ", softmaxout.shape) # first activate first two channels of each bbox subgroup (n) for b in range(bboxplanes): for r in range(batchstride*b, batchstride*b+2*groups): softmaxout[r] = sigmoid(softmaxout[r]) for r in range(batchstride*b+groups*coords, batchstride*b+groups*coords+groups): softmaxout[r] = sigmoid(softmaxout[r]) # Now softmax on all classification arrays in image for b in range(bboxplanes): for g in range(groups): softmax(beginoffset + b*batchstride + g*groupstride, softmaxout, softmaxout, classes, groups) # NMS bboxes = nms.do_baseline_nms(softmaxout, s[1], s[0], net_w, net_h, out_w, out_h, bboxplanes, classes, scorethresh, iouthresh) out_line_list = [] filename = img out_file_txt = ((filename.split("/")[-1]).split(".")[0]) #out_file_txt = "/wrk/acceleration/shareData/COCO_Dataset/gpu_val_result_224"+"/"+out_file_txt+".txt" out_file_txt = out_labels+"/"+out_file_txt+".txt" for j in range(len(bboxes)): print("Obj %d: %s" % (j, class_names[bboxes[j]['classid']])) print("\t score = %f" % (bboxes[j]['prob'])) print("\t (xlo,ylo) = (%d,%d)" % (bboxes[j]['ll']['x'], bboxes[j]['ll']['y'])) print("\t (xhi,yhi) = (%d,%d)" % (bboxes[j]['ur']['x'], bboxes[j]['ur']['y'])) x,y,w,h = darknet_style_xywh(s[1], s[0], bboxes[j]["ll"]["x"],bboxes[j]["ll"]["y"],bboxes[j]['ur']['x'],bboxes[j]['ur']['y']) line_string = str(bboxes[j]["classid"]) line_string = line_string+" "+str(round(bboxes[j]['prob'],3)) line_string = line_string+" "+str(x) line_string = line_string+" "+str(y) line_string = line_string+" "+str(w) line_string = line_string+" "+str(h) out_line_list.append(line_string+"\n") print("loogging into file :", out_file_txt) with open(out_file_txt, "w") as the_file: for lines in out_line_list: the_file.write(lines) #draw_boxes(images[i],bboxes,class_names,colors) return len(images)