def _draw_box(im, box_list, label_list, color=(128, 0, 128), cdict=None, form='center', scale=1): assert form == 'center' or form == 'diagonal', \ 'bounding box format not accepted: {}.'.format(form) for bbox, label in zip(box_list, label_list): if form == 'center': bbox = bbox_transform(bbox) xmin, ymin, xmax, ymax = [int(b) * scale for b in bbox] l = label.split(':')[0] if cdict and l in cdict: c = cdict[l] else: c = color cv2.rectangle(im, (xmin, ymin), (xmax, ymax), c, 1) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(im, label, (max(1, xmin - 10), ymax + 10), font, 0.3, c, 1) #<--------------------
def _draw_box(im, box_list, label_list, color=(0, 255, 0), cdict=None, form='center'): assert form == 'center' or form == 'diagonal', \ 'bounding box format not accepted: {}.'.format(form) for bbox, label in zip(box_list, label_list): if form == 'center': bbox = bbox_transform(bbox) xmin, ymin, xmax, ymax = [int(b) for b in bbox] l = label.split(':')[0] # text before "CLASS: (PROB)" if cdict and l in cdict: c = cdict[l] else: c = color # draw box # cv2.rectangle(im, (xmin, ymin), (xmax, ymax), c, 1) draw = ImageDraw.Draw(im) draw.rectangle([xmin, ymin, xmax, ymax], fill=None, outline=128)
def _draw_box(im, box_list, label_list, color=(0, 255, 0), cdict=None, form='center'): assert form == 'center' or form == 'diagonal', \ 'bounding box format not accepted: {}.'.format(form) for bbox, label in zip(box_list, label_list): if form == 'center': bbox = bbox_transform(bbox) xmin, ymin, xmax, ymax = [int(b) for b in bbox] l = label.split(':')[0] # text before "CLASS: (PROB)" if cdict and l in cdict: c = cdict[l] else: c = color # draw box cv2.rectangle(im, (xmin, ymin), (xmax, ymax), c, 1) # draw label font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(im, label, (xmin, ymax), font, 0.3, c, 1)
def _draw_box(im, box_list_pre, label_list, color=None, cdict=None, form='center', draw_masks=False, fill=False, fps_text='NA'): assert form == 'center' or form == 'diagonal', \ 'bounding box format not accepted: {}.'.format(form) bkp_im = copy.deepcopy(im) box_list = copy.deepcopy(box_list_pre) ht, wd, ch = np.shape(im) for bbox, label in zip(box_list, label_list): if form == 'center': if draw_masks: raw_bounding_box = bbox bbox = bbox_transform2(bbox) else: bbox[0:4] = bbox_transform(bbox[0:4]) else: if draw_masks: raw_bounding_box = bbox_transform_inv2(bbox) xmin, ymin, xmax, ymax = [int(bbox[o]) for o in range(len(bbox)) if o < 4] if draw_masks: points = decode_parameterization(raw_bounding_box) points = np.round(points) # Ensure rounding points = np.array(points, 'int32') l = label.split(':')[0] # text before "CLASS: (PROB)" if cdict and l in cdict: c = cdict[l] # if color dict is provided , use it else: if color == None: # if color is provided use it or use random colors c = (np.random.choice(256), np.random.choice(256), np.random.choice(256)) else: c = color # FPS counter if fps_text != 'NA': fps_counter, per_frame_time = fps_text.split('/') textSize, base = cv2.getTextSize(fps_counter, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) # Get font size cv2.rectangle(im, (10, 10), (1014, 40), (0, 0, 0), cv2.FILLED) cv2.putText(im, fps_counter, (12, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(im, per_frame_time, (750, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1) label_ymin = max(ymin, labelSize[1] + 10) cv2.rectangle(im, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), c, cv2.FILLED) if not draw_masks: cv2.rectangle(im, (xmin, ymin), (xmax, ymax), c, 2) # draw label y_lim = max(ymin-3, 0) font = cv2.FONT_HERSHEY_DUPLEX if draw_masks: if fill: color_mask = np.zeros((ht, wd, 3), np.uint8) cv2.fillConvexPoly(color_mask, points, c) im[color_mask > 0] = bkp_im[color_mask > 0] im[color_mask > 0] = 0.5*im[color_mask > 0] + 0.5*color_mask[color_mask > 0] cv2.putText(im, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1) # Draw label text for p in range(len(points)): cv2.line(im, tuple(points[p]), tuple(points[(p+1)%len(points)]), c, 2) else: cv2.putText(im, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1) # Draw label text
def eval_once( saver, ckpt_path, imdb, model, step, restore_checkpoint): with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if restore_checkpoint: saver.restore(sess, ckpt_path) uninitialized_vars = [] for var in tf.all_variables(): try: sess.run(var) except tf.errors.FailedPreconditionError: uninitialized_vars.append(var) init_new_vars_op = tf.initialize_variables(uninitialized_vars) sess.run(init_new_vars_op) num_images = len(imdb.image_idx) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] # Detection sequence, looping through all images _t = {'im_detect': Timer(), 'im_read': Timer(), 'misc': Timer()} num_detection = 0.0 process_t = np.array([]) for i in xrange(num_images): _t['im_read'].tic() images, scales = imdb.read_image_batch(shuffle=False) _t['im_read'].toc() _t['im_detect'].tic() t_start=process_time() #Using process time to measure detection time det_boxes, det_probs, det_class = sess.run( [model.det_boxes, model.det_probs, model.det_class], feed_dict={model.image_input:images}) t_stop = process_time() #Using process time to measure detection time process_t = np.append(process_t, t_stop-t_start) _t['im_detect'].toc() _t['misc'].tic() for j in range(len(det_boxes)): # batch # rescale det_boxes[j, :, 0::2] /= scales[j][0] det_boxes[j, :, 1::2] /= scales[j][1] det_bbox, score, det_class = model.filter_prediction( det_boxes[j], det_probs[j], det_class[j]) num_detection += len(det_bbox) for c, b, s in zip(det_class, det_bbox, score): all_boxes[c][i].append(bbox_transform(b) + [s]) _t['misc'].toc() if not os.path.exists(FLAGS.eval_dir + "/" + step): os.mkdir(FLAGS.eval_dir + "/" + step) #Save all evaluation data pickle.dump(all_boxes, open(FLAGS.eval_dir + "/" + step + "/all_boxes.p", "wb")) pickle.dump(_t, open(FLAGS.eval_dir + "/" + step + "/_t.p", "wb")) pickle.dump(num_detection, open(FLAGS.eval_dir + "/" + step + "/num_detection.p", "wb")) pickle.dump(process_t, open(FLAGS.eval_dir + "/" + step + "/process_t.p", "wb"))
def decode_box(self, prior_bboxes, prior_variances): mc = self.mc print('self.mbox_loc:', self.mbox_loc) mbox_loc_reshape = tf.reshape(self.mbox_loc, [mc.BATCH_SIZE, -1, 4]) delta_xmin, delta_ymin, delta_xmax, delta_ymax = tf.unstack( mbox_loc_reshape, axis=2) prior_bboxes_reshape = tf.reshape(prior_bboxes, [-1, 4]) prior_variances_reshape = tf.reshape(prior_variances, [-1, 4]) prior_width = prior_bboxes_reshape[:, 2] - prior_bboxes_reshape[:, 0] prior_height = prior_bboxes_reshape[:, 3] - prior_bboxes_reshape[:, 1] prior_center_x = (prior_bboxes_reshape[:, 0] + prior_bboxes_reshape[:, 2]) / 2. prior_center_y = (prior_bboxes_reshape[:, 1] + prior_bboxes_reshape[:, 3]) / 2. bbox_center_x = tf.identity(prior_variances_reshape[:, 0] * delta_xmin * prior_width + prior_center_x) bbox_center_y = tf.identity(prior_variances_reshape[:, 1] * delta_ymin * prior_height + prior_center_y) bbox_width = tf.identity( util.safe_exp(prior_variances_reshape[:, 2] * delta_xmax, mc.EXP_THRESH) * prior_width) bbox_height = tf.identity( util.safe_exp(prior_variances_reshape[:, 3] * delta_ymax, mc.EXP_THRESH) * prior_height) xmins, ymins, xmaxs, ymaxs = util.bbox_transform( [bbox_center_x, bbox_center_y, bbox_width, bbox_height]) ''' xmins = tf.minimum( tf.maximum(0.0, xmins), 1., name='bbox_xmin') ymins = tf.minimum( tf.maximum(0.0, ymins), 1., name='bbox_ymin') xmaxs = tf.maximum( tf.minimum(1., xmaxs), 0.0, name='bbox_xmax') ymaxs = tf.maximum( tf.minimum(1., ymaxs), 0.0, name='bbox_ymax') ''' xmins *= mc.IMAGE_WIDTH xmaxs *= mc.IMAGE_WIDTH ymins *= mc.IMAGE_HEIGHT ymaxs *= mc.IMAGE_HEIGHT self.decode_boxes = tf.stack([xmins, ymins, xmaxs, ymaxs], axis=2) self._add_act(self.decode_boxes, 'decode_boxes')
def drift(self, image, gt_boxes): mc = self.mc drift_prob = np.random.rand() if drift_prob > mc.DRIFT_PROB: return image, gt_boxes ori_height, ori_width, ori_channel = [int(v) for v in image.shape] gt_boxes[:, 0::2] *= ori_width gt_boxes[:, 1::2] *= ori_height gt_boxes = np.array([bbox_transform_inv(box) for box in gt_boxes]) # Ensures that gt boundibg box is not cutted out of the image max_drift_x = min(gt_boxes[:, 0] - gt_boxes[:, 2] / 2.0 + 1) max_drift_y = min(gt_boxes[:, 1] - gt_boxes[:, 3] / 2.0 + 1) assert max_drift_x >= 0 and max_drift_y >= 0, 'bbox out of image' dy = np.random.randint(-mc.DRIFT_Y, min(mc.DRIFT_Y + 1, max_drift_y)) dx = np.random.randint(-mc.DRIFT_X, min(mc.DRIFT_X + 1, max_drift_x)) # shift bbox gt_boxes[:, 0] = gt_boxes[:, 0] - dx gt_boxes[:, 1] = gt_boxes[:, 1] - dy #print ('[drift] -----------4') orig_h, orig_w, _ = [int(v) for v in image.shape] # distort image orig_h -= dy orig_w -= dx orig_x, dist_x = max(dx, 0), max(-dx, 0) orig_y, dist_y = max(dy, 0), max(-dy, 0) distorted_im = np.zeros( (int(orig_h), int(orig_w), 3)).astype(np.float32) distorted_im[dist_y:, dist_x:, :] = image[orig_y:, orig_x:, :] im = distorted_im gt_boxes = np.array([bbox_transform(box) for box in gt_boxes]) #print ('[drift] -----------finish') height, width, channel = [int(v) for v in im.shape] gt_boxes[:, 0::2] /= width gt_boxes[:, 1::2] /= height return im, gt_boxes
def draw_box(im, box_list, label_list, pose_list, age_list, color=(0, 255, 0), cdict=None, form='center'): assert form == 'center' or form == 'diagonal', \ 'bounding box format not accepted: {}.'.format(form) for bbox, label, pose, age in zip(box_list, label_list, pose_list, age_list): if form == 'center': bbox = bbox_transform(bbox) xmin, ymin, xmax, ymax = [int(b) for b in bbox] rescale_x = 1.0 * 1280 / 480 rescale_y = 1.0 * 720 / 240 xmin *= rescale_x xmax *= rescale_x ymin *= rescale_y ymax *= rescale_y l = label.split(':')[0] # text before "CLASS: (PROB)" if cdict and l in cdict: c = cdict[l] else: c = color # draw box cv2.rectangle(im, (int(xmin), int(ymin)), (int(xmax), int(ymax)), c, 1) # draw label font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(im, label + "; " + pose + "; " + age, (int(xmin), int(ymax)), font, 0.3, c, 1)
def draw_box(im, box_list, label_list, color=(0, 255, 0), cdict={}, form='center'): assert form in ['center', 'diagonal'], 'bounding box format not accepted: %s.' % form for bbox, label in zip(box_list, label_list): if form == 'center': bbox = bbox_transform(bbox) xmin, ymin, xmax, ymax = [int(b) for b in bbox] l = label.split(':')[0] # text before "CLASS: PROB" c = cdict.get(l, color) # draw box cv2.rectangle(im, (xmin, ymin), (xmax, ymax), c, 1) # draw label font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(im, label, (xmin, ymax), font, 0.3, c, 1)
def _add_interpretation_graph(self): """Interpret NN output.""" mc = self.mc with tf.variable_scope('interpret_output') as scope: preds = self.preds # probability num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES self.pred_class_probs = tf.reshape( tf.nn.softmax( tf.reshape(preds[:, :, :, :num_class_probs], [-1, mc.CLASSES])), [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], name='pred_class_probs') # confidence num_confidence_scores = mc.ANCHOR_PER_GRID + num_class_probs self.pred_conf = tf.sigmoid(tf.reshape( preds[:, :, :, num_class_probs:num_confidence_scores], [mc.BATCH_SIZE, mc.ANCHORS]), name='pred_confidence_score') # bbox_delta self.pred_box_delta = tf.reshape(preds[:, :, :, num_confidence_scores:], [mc.BATCH_SIZE, mc.ANCHORS, 4], name='bbox_delta') # number of object. Used to normalize bbox and classification loss self.num_objects = tf.reduce_sum(self.input_mask, name='num_objects') with tf.variable_scope('bbox') as scope: with tf.variable_scope('stretching'): delta_x, delta_y, delta_w, delta_h = tf.unstack( self.pred_box_delta, axis=2) anchor_x = mc.ANCHOR_BOX[:, 0] anchor_y = mc.ANCHOR_BOX[:, 1] anchor_w = mc.ANCHOR_BOX[:, 2] anchor_h = mc.ANCHOR_BOX[:, 3] box_center_x = tf.identity(anchor_x + delta_x * anchor_w, name='bbox_cx') box_center_y = tf.identity(anchor_y + delta_y * anchor_h, name='bbox_cy') box_width = tf.identity(anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH), name='bbox_width') box_height = tf.identity(anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH), name='bbox_height') self._activation_summary(delta_x, 'delta_x') self._activation_summary(delta_y, 'delta_y') self._activation_summary(delta_w, 'delta_w') self._activation_summary(delta_h, 'delta_h') self._activation_summary(box_center_x, 'bbox_cx') self._activation_summary(box_center_y, 'bbox_cy') self._activation_summary(box_width, 'bbox_width') self._activation_summary(box_height, 'bbox_height') with tf.variable_scope('trimming'): xmins, ymins, xmaxs, ymaxs = util.bbox_transform( [box_center_x, box_center_y, box_width, box_height]) # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = tf.minimum(tf.maximum(0.0, xmins), mc.IMAGE_WIDTH - 1.0, name='bbox_xmin') self._activation_summary(xmins, 'box_xmin') ymins = tf.minimum(tf.maximum(0.0, ymins), mc.IMAGE_HEIGHT - 1.0, name='bbox_ymin') self._activation_summary(ymins, 'box_ymin') xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs), 0.0, name='bbox_xmax') self._activation_summary(xmaxs, 'box_xmax') ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs), 0.0, name='bbox_ymax') self._activation_summary(ymaxs, 'box_ymax') self.det_boxes = tf.transpose(tf.stack( util.bbox_transform_inv([xmins, ymins, xmaxs, ymaxs])), (1, 2, 0), name='bbox') with tf.variable_scope('IOU'): def _tensor_iou(box1, box2): with tf.variable_scope('intersection'): xmin = tf.maximum(box1[0], box2[0], name='xmin') ymin = tf.maximum(box1[1], box2[1], name='ymin') xmax = tf.minimum(box1[2], box2[2], name='xmax') ymax = tf.minimum(box1[3], box2[3], name='ymax') w = tf.maximum(0.0, xmax - xmin, name='inter_w') h = tf.maximum(0.0, ymax - ymin, name='inter_h') intersection = tf.multiply(w, h, name='intersection') with tf.variable_scope('union'): w1 = tf.subtract(box1[2], box1[0], name='w1') h1 = tf.subtract(box1[3], box1[1], name='h1') w2 = tf.subtract(box2[2], box2[0], name='w2') h2 = tf.subtract(box2[3], box2[1], name='h2') union = w1 * h1 + w2 * h2 - intersection return intersection/(union+mc.EPSILON) \ * tf.reshape(self.input_mask, [mc.BATCH_SIZE, mc.ANCHORS]) self.ious = self.ious.assign( _tensor_iou( util.bbox_transform(tf.unstack(self.det_boxes, axis=2)), util.bbox_transform(tf.unstack(self.box_input, axis=2)))) self._activation_summary(self.ious, 'conf_score') with tf.variable_scope('probability') as scope: self._activation_summary(self.pred_class_probs, 'class_probs') probs = tf.multiply(self.pred_class_probs, tf.reshape(self.pred_conf, [mc.BATCH_SIZE, mc.ANCHORS, 1]), name='final_class_prob') self._activation_summary(probs, 'final_class_prob') self.det_probs = tf.reduce_max(probs, 2, name='score') self.det_class = tf.argmax(probs, 2, name='class_idx')
def eval_once(saver, ckpt_path, summary_writer, eval_summary_ops, eval_summary_phs, imdb, model): with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: # Restores from checkpoint saver.restore(sess, ckpt_path) # Assuming model_checkpoint_path looks something like: # /ckpt_dir/model.ckpt-0, # extract global_step from it. global_step = ckpt_path.split('/')[-1].split('-')[-1] #(7,-9,-1)0.000 #(7,-15,-2)0.743 #(7,-16,-2)0.800 #(7,-16,-1)0.891 #(7,-15,-1)0.788 #(7,-14,-1)0.450 #(7,-15,0)0.774 #new #(7,-7, 0)0.457 #(7,-9,-1)0.881 #(7,-8,-1)0.669 #(7,-9,-2)0.778 #(7,-10,-3)0.010 #fix_ops = f2p.float2pow2_offline(7, -9, -1, "./data/weights", sess, resave=True, convert=False, trt=True) # sess.run(tf.initialize_variables([fix_ops])) # sess.run(tf.initialize_variables(tf.trainable_variables())) # sess.run(tf.initialize_variables(tf.all_variables())) #sess.run(fix_ops) #exit() # for x in tf.trainable_variables(): # print (x.eval(session=sess)) # save_data(sess, "../data/fixedweight") num_images = len(imdb.image_idx) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] _t = {'im_detect': Timer(), 'im_read': Timer(), 'misc': Timer()} #np.set_printoptions(threshold='nan') #probs_file = open('probs.txt', 'w') #boxes_file = open('boxes.txt', 'w') #class_file = open('class.txt', 'w') # preds_file = open('preds.txt', 'w') #conf_file = open('conf.txt', 'w') num_detection = 0.0 for i in xrange(num_images): _t['im_read'].tic() images, scales = imdb.read_image_batch(shuffle=False) _t['im_read'].toc() _t['im_detect'].tic() # det_boxes, det_probs, det_class = sess.run( # [model.det_boxes, model.det_probs, model.det_class], # feed_dict={model.image_input:images}) # sess = tf_debug.LocalCLIDebugWrapperSession(sess) # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) det_boxes, det_probs, det_class, det_conf, preds = sess.run( [ model.det_boxes, model.det_probs, model.det_class, model.pred_conf, model.preds ], feed_dict={model.image_input: images}) ################ save feature map ####################### image_input, conv1, pool1, stage_1_1, left_a, left_b, right_a, right_b, right_c = sess.run( [ model.image_input, model.conv1, model.pool1, model.stage_1_1, model.left_a, model.left_b, model.right_a, model.right_b, model.right_c ], feed_dict={model.image_input: images}) _t['im_detect'].toc() ##############print the feature map############## # print('left_a is:'+str(left_a)) # print('left_b is:'+str(left_b)) # print('right_a is:'+str(right_a)) # print('right_b is:'+str(right_b)) # print('right_c is:'+str(right_c)) LocalPath = os.getcwd() save_path = LocalPath + '/data0119/' if not os.path.exists(save_path): os.mkdir(save_path) conv1_ratio = save_dirct(np.ravel(conv1), save_path, 'conv1') pool1_ratio = save_dirct(np.ravel(pool1), save_path, 'pool1') left_a_ratio = save_dirct(np.ravel(left_a), save_path, 'left_a') left_b_ratio = save_dirct(np.ravel(left_b), save_path, 'left_b') right_a_ratio = save_dirct(np.ravel(right_a), save_path, 'right_a') right_b_ratio = save_dirct(np.ravel(right_b), save_path, 'right_b') right_c_ratio = save_dirct(np.ravel(right_c), save_path, 'right_c') ratio_list = [ conv1_ratio, pool1_ratio, left_a_ratio, left_b_ratio, right_a_ratio, right_b_ratio, right_c_ratio ] ratio_file = open(save_path + 'ratio.txt', 'w+') for i in range(len(ratio_list)): ratio_file.write(str(ratio_list[i]) + '\n') ############### save feature map over ################ ############### read ckpt file ####################### layer_list = ['conv1', 'pool1', 'stage_1_1'] ckpt_path = LocalPath + '/log/train_original/model.ckpt-0' ############### read ckpt file over ################## # det_boxes, det_probs, det_class, preds = sess.run( # [model.det_boxes, model.det_probs, model.det_class, model.preds], # feed_dict={model.image_input:images}) #probs_file.write(str(det_probs)) #boxes_file.write(str(det_boxes)) #class_file.write(str(det_class)) # preds_file.write(str(preds)) #conf_file.write(str(det_conf)) #probs_file.close() #boxes_file.close() #class_file.close() # preds_file.close() #conf_file.close() #exit() # print("the shape of fearue is:"+str(np.shape(conv1))) # draw_featuremap('conv1',conv1) # conv1 = conv1.tolist() # np.savetxt('helloworld.txt',conv1) # print(pool1) # print(conv_final) _t['misc'].tic() for j in range(len(det_boxes)): # batch # rescale det_boxes[j, :, 0::2] /= scales[j][0] det_boxes[j, :, 1::2] /= scales[j][1] # det_bbox, score, det_class = model.filter_prediction( # det_boxes[j], det_probs[j], det_class[j]) det_bbox, score, det_class = model.dac_filter_prediction( det_boxes[j], det_probs[j], det_class[j], det_conf[j]) num_detection += len(det_bbox) for c, b, s in zip(det_class, det_bbox, score): all_boxes[c][i].append(bbox_transform(b) + [s]) _t['misc'].toc() # probs_file.write(str(score)) # boxes_file.write(str(det_bbox)) # class_file.write(str(det_class)) # # preds_file.write(str(preds)) # # conf_file.write(str(det_conf)) # probs_file.close() # boxes_file.close() # class_file.close() # exit() # print(preds) print('im_detect: {:d}/{:d} im_read: {:.3f}s ' 'detect: {:.3f}s misc: {:.3f}s'.format( i + 1, num_images, _t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) print('Evaluating detections...') aps, ap_names = imdb.evaluate_detections(FLAGS.eval_dir, global_step, all_boxes) print('Evaluation summary:') print(' Average number of detections per image: {}:'.format( num_detection / num_images)) print(' Timing:') print(' im_read: {:.3f}s detect: {:.3f}s misc: {:.3f}s'.format( _t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) print(' Average precisions:') feed_dict = {} for cls, ap in zip(ap_names, aps): feed_dict[eval_summary_phs['APs/' + cls]] = ap print(' {}: {:.3f}'.format(cls, ap)) print(' Mean average precision: {:.3f}'.format(np.mean(aps))) feed_dict[eval_summary_phs['APs/mAP']] = np.mean(aps) feed_dict[eval_summary_phs['timing/im_detect']] = \ _t['im_detect'].average_time feed_dict[eval_summary_phs['timing/im_read']] = \ _t['im_read'].average_time feed_dict[eval_summary_phs['timing/post_proc']] = \ _t['misc'].average_time feed_dict[eval_summary_phs['num_det_per_image']] = \ num_detection/num_images print('Analyzing detections...') stats, ims = imdb.do_detection_analysis_in_eval( FLAGS.eval_dir, global_step) eval_summary_str = sess.run(eval_summary_ops, feed_dict=feed_dict) for sum_str in eval_summary_str: summary_writer.add_summary(sum_str, global_step)
def image_demo(): """Detect image.""" with tf.Graph().as_default(): # Load model mc = kitti_squeezeDet_config() mc.BATCH_SIZE = 1 # model parameters will be restored from checkpoint mc.LOAD_PRETRAINED_MODEL = False model = SqueezeDet(mc, FLAGS.gpu) saver = tf.train.Saver(model.model_params) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: saver.restore(sess, FLAGS.checkpoint) d = FLAGS.image_dir image_list = sorted([ os.path.join(d, f) for f in os.listdir(d) if os.path.isfile(os.path.join(d, f)) ]) for f in image_list: im = cv2.imread(f) im = im.astype(np.float32, copy=False) im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT)) input_image = im - mc.BGR_MEANS start_clock = time.clock() # Detect det_boxes, det_probs, det_class = sess.run( [model.det_boxes, model.det_probs, model.det_class], feed_dict={ model.image_input: [input_image], model.keep_prob: 1.0 }) # Filter final_boxes, final_probs, final_class = model.filter_prediction( det_boxes[0], det_probs[0], det_class[0]) duration = time.clock() - start_clock keep_idx = [idx for idx in range(len(final_probs)) \ if final_probs[idx] > mc.PLOT_PROB_THRESH] final_boxes = [final_boxes[idx] for idx in keep_idx] final_probs = [final_probs[idx] for idx in keep_idx] final_class = [final_class[idx] for idx in keep_idx] # TODO(bichen): move this color dict to configuration file cls2clr = { 'car': (255, 191, 0), 'cyclist': (0, 191, 255), 'pedestrian': (255, 0, 191) } file_name = os.path.split(f)[1] expected_classes = [] expected_boxes = [] class_count = dict((k, 0) for k in mc.CLASS_NAMES) label_file_name = os.path.join(FLAGS.label_dir, file_name) label_file_name = os.path.splitext(label_file_name)[0] + '.txt' with open(label_file_name) as lf: label_lines = [x.strip() for x in lf.readlines()] for l in label_lines: parts = l.strip().lower().split(' ') klass = parts[0] if klass in class_count.keys(): class_count[klass] += 1 bbox = [float(parts[i]) for i in [4, 5, 6, 7]] expected_boxes.append(bbox) expected_classes.append(klass) # Draw original boxes my_draw_box(im, expected_boxes, [k + ': (TRUE)' for k in expected_classes], form='diagonal', label_placement='top', color=(200, 200, 200)) # Draw recognized boxes my_draw_box( im, final_boxes, [mc.CLASS_NAMES[idx]+': (%.2f)'% prob \ for idx, prob in zip(final_class, final_probs)], cdict=cls2clr, ) out_file_name = os.path.join(FLAGS.out_dir, 'out_' + file_name) cv2.imwrite(out_file_name, im) print('File: {}'.format(out_file_name)) print('Duration: {} sec'.format(duration)) class_count = dict((k.lower(), 0) for k in mc.CLASS_NAMES) for k in final_class: class_count[mc.CLASS_NAMES[k].lower()] += 1 for k, v in class_count.items(): print('Recognized {}: {}'.format(k, v)) for k, v in class_count.items(): print('Expected {}: {}'.format(k, v)) false_positives_count = dict((k, 0) for k in mc.CLASS_NAMES) threshold = FLAGS.iou_threshold for klass, final_box in zip(final_class, final_boxes): remove_index = -1 transformed = bbox_transform(final_box) for i, expected_box in enumerate(expected_boxes): iou = bb_intersection_over_union( transformed, expected_box) if iou >= threshold: remove_index = i break if -1 == remove_index: false_positives_count[mc.CLASS_NAMES[klass]] += 1 else: # remove found box to not pick it up in the future del expected_boxes[remove_index] for k, v in false_positives_count.items(): print('False positive {}: {}'.format(k, v)) print('') sys.stdout.flush()
def _define_bbox(pred_bbox_delta, ANCHOR_BOX): delta_x, delta_y, delta_w, delta_h = tf.unstack( pred_bbox_delta, axis=2) # set_anchors(mc, scale) anchor_x = ANCHOR_BOX[:, 0] anchor_y = ANCHOR_BOX[:, 1] anchor_w = ANCHOR_BOX[:, 2] anchor_h = ANCHOR_BOX[:, 3] box_center_x = tf.identity(anchor_x + delta_x * anchor_w, name='bbox_cx') box_center_y = tf.identity(anchor_y + delta_y * anchor_h, name='bbox_cy') box_width = tf.identity( anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH), name='bbox_width') box_height = tf.identity( anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH), name='bbox_height') self._activation_summary(delta_x, 'delta_x') self._activation_summary(delta_y, 'delta_y') self._activation_summary(delta_w, 'delta_w') self._activation_summary(delta_h, 'delta_h') self._activation_summary(box_center_x, 'bbox_cx') self._activation_summary(box_center_y, 'bbox_cy') self._activation_summary(box_width, 'bbox_width') self._activation_summary(box_height, 'bbox_height') with tf.variable_scope('trimming'): xmins, ymins, xmaxs, ymaxs = util.bbox_transform([ box_center_x, box_center_y, box_width, box_height ]) # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = tf.minimum(tf.maximum(0.0, xmins), mc.IMAGE_WIDTH - 1.0, name='bbox_xmin') self._activation_summary(xmins, 'box_xmin') ymins = tf.minimum(tf.maximum(0.0, ymins), mc.IMAGE_HEIGHT - 1.0, name='bbox_ymin') self._activation_summary(ymins, 'box_ymin') xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs), 0.0, name='bbox_xmax') self._activation_summary(xmaxs, 'box_xmax') ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs), 0.0, name='bbox_ymax') self._activation_summary(ymaxs, 'box_ymax') det_boxes = tf.transpose(tf.stack( util.bbox_transform_inv( [xmins, ymins, xmaxs, ymaxs])), (1, 2, 0), name='bbox') return det_boxes
def detect_image(mc, sess, model, class_names, avg_precision, orig_im, file_name, original_file_path): global box_counter box_counter = 0 if os.environ.get('CODEREEF', '') == 'YES': try: boxed_img = orig_im.copy() except: return else: boxed_img = orig_im.copy() im = orig_im.astype(np.float32, copy=True) im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT)) input_image = im - mc.BGR_MEANS start_clock = time.time() # Detect det_boxes, det_probs, det_class = sess.run( [model.det_boxes, model.det_probs, model.det_class], feed_dict={model.image_input: [input_image]}) # Filter final_boxes, final_probs, final_class = model.filter_prediction( det_boxes[0], det_probs[0], det_class[0]) duration = time.time() - start_clock keep_idx = [idx for idx in range(len(final_probs)) \ if final_probs[idx] > mc.PLOT_PROB_THRESH] final_boxes = [final_boxes[idx] for idx in keep_idx] final_probs = [final_probs[idx] for idx in keep_idx] final_class = [final_class[idx] for idx in keep_idx] recognized = [ Box(class_names[k], bbox_transform(bbox), prob=p) for k, bbox, p in zip(final_class, final_boxes, final_probs) ] # TODO(bichen): move this color dict to configuration file cls2clr = { 'car': (255, 191, 0), 'cyclist': (0, 191, 255), 'pedestrian': (255, 0, 191) } expected = [] dontcare = [] class_count = dict((k, 0) for k in class_names) if FLAGS.label_dir: label_file_name = os.path.join(FLAGS.label_dir, file_name) label_file_name = os.path.splitext(label_file_name)[0] + '.txt' if os.path.isfile(label_file_name): with open(label_file_name) as lf: label_lines = [x.strip() for x in lf.readlines()] for l in label_lines: parts = l.strip().lower().split(' ') klass = parts[0] bbox = [float(parts[i]) for i in [4, 5, 6, 7]] if klass in class_count.keys(): class_count[klass] += 1 b = Box(klass, bbox, truncation=float(parts[1]), occlusion=float(parts[2])) expected.append(b) elif klass == 'dontcare': dontcare.append(Box(klass, bbox)) expected_class_count = class_count rescaled_recognized = rescale_boxes(recognized, im.shape, orig_im.shape) # Draw dontcare boxes my_draw_box(boxed_img, [b.bbox for b in dontcare], ['dontcare' for b in dontcare], label_placement='top', color=(255, 255, 255)) # Draw original boxes my_draw_box(boxed_img, [b.bbox for b in expected], [box.klass + ': (TRUE)' for box in expected], label_placement='top', color=(200, 200, 200)) # Draw recognized boxes my_draw_box( boxed_img, [b.bbox for b in rescaled_recognized], [b.klass + ': (%.2f)' % b.prob for b in rescaled_recognized], cdict=cls2clr, ) out_file_name = os.path.join(FLAGS.out_dir, file_name) cv2.imwrite(out_file_name, orig_im) if os.environ.get('CODEREEF', '') != 'YES': boxed_out_file_name = os.path.join(FLAGS.out_dir, 'boxed_' + file_name) cv2.imwrite(boxed_out_file_name, boxed_img) results = {'objects': []} print('File: {}'.format(out_file_name)) if '' != original_file_path: print('Original file: {}'.format(original_file_path)) print('Duration: {} sec'.format(duration)) class_count = dict((k, 0) for k in class_names) for k in final_class: class_count[class_names[k]] += 1 for k, v in class_count.items(): print('Recognized {}: {}'.format(k, v)) for k, v in expected_class_count.items(): print('Expected {}: {}'.format(k, v)) for box in rescaled_recognized: b = box.bbox print('Detection {}: {:.3f} {:.3f} {:.3f} {:.3f} {:.3f}'.format( box.klass, b[0], b[1], b[2], b[3], box.prob)) if os.environ.get('CODEREEF', '') == 'YES': obj = { 'name': box.klass, 'x': int(b[0]), 'y': int(b[1]), 'w': int(b[2]) - int(b[0]), 'h': int(b[3]) - int(b[1]), 'probability': float(box.prob) } results['objects'].append(obj) for box in expected: b = box.bbox print('Ground truth {}: {:.3f} {:.3f} {:.3f} {:.3f} 1'.format( box.klass, b[0], b[1], b[2], b[3])) # Record JSON if os.environ.get('CODEREEF', '') == 'YES': if os.path.isfile(original_file_path): os.remove(original_file_path) codereef_out_file_name = os.path.join( FLAGS.out_dir, os.path.splitext(file_name)[0] + '.json') if not os.path.isfile(codereef_out_file_name): import json with open(codereef_out_file_name, 'w') as of: of.write(json.dumps(results, indent=2, sort_keys=True)) expected = [b for b in expected if care(b, dontcare)] recognized = [b for b in recognized if care(b, dontcare)] for k in class_names: all_rec = len([b for b in recognized if b.klass == k]) all_gt = len([b for b in expected if b.klass == k]) report = 0 != all_rec or 0 != all_gt # don't report not found and actually unexpected labels, but still count them for mAP eval_boxes(expected, recognized, k, UNKNOWN) tp_easy, all_gt_easy = eval_boxes(expected, recognized, k, EASY) tp_mod, all_gt_mod = eval_boxes(expected, recognized, k, MODERATE) tp_hard, all_gt_hard = eval_boxes(expected, recognized, k, HARD) tp = tp_easy + tp_mod + tp_hard fp = all_rec - tp if report: print('True positive {}: {} easy, {} moderate, {} hard'.format( k, tp_easy, tp_mod, tp_hard)) print('False positive {}: {}'.format(k, fp)) precision = [ safe_div(tp_easy + fp, tp_easy), safe_div(tp_mod + fp, tp_mod), safe_div(tp_hard + fp, tp_hard) ] recall = 0.0 if 0 == all_gt: recall = 1.0 if 0 == all_rec else 0.0 else: recall = float(tp) / float(all_gt) if report: print('Precision {}: {:.2f} easy, {:.2f} moderate, {:.2f} hard'. format(k, precision[EASY], precision[MODERATE], precision[HARD])) print('Recall {}: {:.2f}'.format(k, recall)) ap = avg_precision[k] ap[EASY].addIf(precision[EASY], 0 < all_gt_easy) ap[MODERATE].addIf(precision[MODERATE], 0 < all_gt_mod) ap[HARD].addIf(precision[HARD], 0 < all_gt_hard) if report: print('Rolling AP {}: {:.2f} easy, {:.2f} moderate, {:.2f} hard'. format(k, ap[EASY].avg, ap[MODERATE].avg, ap[HARD].avg)) print('Rolling mAP: {:.4f}'.format(calc_mAP(avg_precision))) print('') sys.stdout.flush()
def _draw_box(im, box_list, label_list, color=(0, 255, 0), cdict=None, form='center'): assert form == 'center' or form == 'diagonal', \ 'bounding box format not accepted: {}.'.format(form) basket_box = [] ball_box = [] event = False for bbox, label in zip(box_list, label_list): l = label.split(':')[0] # text before "CLASS: (PROB)" if l == '02basket': basket_box = [bbox] else: ball_box.append(bbox) if form == 'center': bbox = bbox_transform(bbox) xmin, ymin, xmax, ymax = [int(b) for b in bbox] if cdict and l in cdict: c = cdict[l] else: c = color # draw box # cv2.rectangle(im, (xmin, ymin), (xmax, ymax), c, 1) # # draw label # font = cv2.FONT_HERSHEY_SIMPLEX # cv2.putText(im, label, (xmin, ymax), font, 0.3, c, 1) # distance_centers = sqrt((centerX_cha-centerX_def)*(centerX_cha-centerX_def) + # (centerY_cha-centerY_def)*(centerY_cha-centerY_def)) proposal_goal = False print('basket_box:', basket_box) for ballbox in ball_box: print('ball_box:', ballbox) if len (ballbox) > 0 : bbox = bbox_transform(ballbox) xmin, ymin, xmax, ymax = [int(b) for b in bbox] cv2.rectangle(im, (xmin, ymin), (xmax, ymax), (0, 0, 255), 1) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(im, 'ball', (xmin, ymax), font, 0.3, (0, 0, 255), 1) if len(basket_box) > 0: distance = math.sqrt((basket_box[0][0] - ballbox[0]) * (basket_box[0][0] - ballbox[0]) + (basket_box[0][1] - ballbox[1]) * (basket_box[0][1] - ballbox[1])) bbox = bbox_transform(basket_box[0]) xmin, ymin, xmax, ymax = [int(b) for b in bbox] cv2.rectangle(im, (xmin, ymin), (xmax, ymax), (0, 255, 0), 1) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(im, 'basket', (xmin, ymax), font, 0.3, (0, 255, 0), 1) # if proposal_goal: # if distance <= 100: # font = cv2.FONT_HERSHEY_SIMPLEX # # cv2.putText(im, 'GOALLL!!!!', (int(basket_box[0][0]) - 50, int(basket_box[0][1]) - 50), font, 1, c, 1) # # draw box # # bbox = bbox_transform(basket_box[0]) # xmin, ymin, xmax, ymax = [int(b) for b in bbox] # cv2.rectangle(im, (xmin, ymin), (xmax, ymax), (0, 255, 0), 1) # font = cv2.FONT_HERSHEY_SIMPLEX # cv2.putText(im, 'basket', (xmin, ymax), font, 0.3, (0, 255, 0), 1) # # bbox = bbox_transform(ballbox) # xmin, ymin, xmax, ymax = [int(b) for b in bbox] # cv2.rectangle(im, (xmin, ymin), (xmax, ymax), (255, 0, 0), 1) # font = cv2.FONT_HERSHEY_SIMPLEX # cv2.putText(im, 'ball', (xmin, ymax), font, 0.3, (255, 0, 0), 1) # event = True # else: ballbox1 = bbox_transform(ballbox) if distance <= 100 and (ballbox[1] - basket_box[0][1]) <= 100 and (ballbox[1] - basket_box[0][1]) >= -10 and abs(ballbox[0] - basket_box[0][0]) <= 20: font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(im, 'GOALLL!!!!', (int(basket_box[0][0]) - 10, int(basket_box[0][1]) - 10), font, 1, c, 1) # draw box event = True # print('goallll') # if ( ((ballbox[1] - basket_box[0][3]) <= 50 and (ballbox[1] - basket_box[0][3]) >= 0) # and ( (basket_box[0][2] - ballbox[0]) <=10 and (basket_box[0][2] - ballbox[0]) >= -10)): # cv2.putText(im, 'GOALLL!!!!', (basket_box[0][0], basket_box[0][1]-50), font, 1, c, 1) # print('goallll') # asddsfsd return event
def Get_feed_data(self): mc = self.mc batch_gt_boxes, batch_gt_labels, batch_image = self.read_batch_gt_data( shuffle=True) batch_gt_boxes = np.array(batch_gt_boxes) batch_gt_labels = np.array(batch_gt_labels) batch_image = np.array(batch_image) input_images = [] gt_data = [] for i in range(0, len(batch_gt_boxes)): im = batch_image[i] im -= mc.BGR_MEANS gt_bbox = np.array(batch_gt_boxes[i]) gt_label = np.array(batch_gt_labels[i]) im, gt_bbox, anno_box_filter_idx = self.Preprocess(im, gt_bbox) assert len(anno_box_filter_idx) == len(gt_bbox) lables = [] for idx in anno_box_filter_idx: lables.append(gt_label[idx]) #lables = [[gt_label[idx]] for idx in anno_box_filter_idx] gt_label = np.array(lables) orig_h, orig_w, _ = [float(v) for v in im.shape] #mirror gt_bbox[:, 0::2] *= orig_w gt_bbox[:, 1::2] *= orig_h gt_bbox_center = np.array( [bbox_transform_inv(box) for box in gt_bbox]) if np.random.randint(2) > 0.5: im = im[:, ::-1, :] gt_bbox_center[:, 0] = orig_w - 1 - gt_bbox_center[:, 0] gt_bbox = np.array([bbox_transform(box) for box in gt_bbox_center]) gt_bbox[:, 0::2] /= orig_w gt_bbox[:, 1::2] /= orig_h im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT)) input_images.append(im) # scale image #image_anno = im + mc.BGR_MEANS #self.draw_annno(image_anno,gt_bbox,'test_' + str(i) + '.jpg') #gt_data.append([i,]) num = len(gt_bbox) for j in range(0, num): gt_data.append([ i, gt_label[j], 0, gt_bbox[j][0], gt_bbox[j][1], gt_bbox[j][2], gt_bbox[j][3] ]) #batch_ids = np.ones((num ,1))*i #instance_ids = np.ones((num ,1)) #gt_data.append(np.concatenate([batch_ids,gt_label,instance_ids,gt_bbox],axis=1)) gt_boxes, gt_labels = self.parse_gt_data(gt_data) all_match_indices, all_match_overlaps = self._math_bbox( mc.ANCHOR_BOX, gt_boxes) gt_boxes_dense, gt_labels_dense, input_mask = self._sparse_to_dense( gt_boxes, gt_labels, all_match_indices) return input_images, gt_boxes_dense, gt_labels_dense, input_mask, all_match_overlaps
def eval_once(saver, ckpt_path, summary_writer, eval_summary_ops, eval_summary_phs, imdb, model): with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: # Restores from checkpoint saver.restore(sess, ckpt_path) # If we are applying simulated quantization if FLAGS.use_quantization: # Assertions for validity of quantization arguments assert FLAGS.rounding_method != 'none', \ "Must specify rounding method (nearest_neighbor or stochastic)" assert FLAGS.model_bits != 0, \ "Must specify non-zero number of model bits" #assert FLAGS.activation_bits != 0, \ # "Must specify non-zero number of activation bits" # Extract parameter references for quantization all_vars = ops.get_collection_ref( ops.GraphKeys.TRAINABLE_VARIABLES) # Get global minimums and maximums for weights (kernels) and biases global_min = float('inf') global_max = 0.0 global_weight_min = float('inf') global_weight_max = 0.0 global_bias_min = float('inf') global_bias_max = 0.0 global_1x1_min = float('inf') global_1x1_max = 0.0 global_3x3_min = float('inf') global_3x3_max = 0.0 global_conv_min = float('inf') global_conv_max = 0.0 global_1x1_weight_min = float('inf') global_1x1_weight_max = 0.0 global_3x3_weight_min = float('inf') global_3x3_weight_max = 0.0 global_conv_weight_min = float('inf') global_conv_weight_max = 0.0 global_1x1_bias_min = float('inf') global_1x1_bias_max = 0.0 global_3x3_bias_min = float('inf') global_3x3_bias_max = 0.0 global_conv_bias_min = float('inf') global_conv_bias_max = 0.0 for i in range(len(all_vars)): print(all_vars[i].name) tensor = sess.run(all_vars[i]) tensor_min = np.amin(tensor) tensor_max = np.amax(tensor) # Update global range if tensor_min < global_min: global_min = tensor_min #print('new_min: '+str(global_min)) if tensor_max > global_max: global_max = tensor_max # Update kernel and bias ranges if ('kernels' in all_vars[i].name): if tensor_min < global_weight_min: global_weight_min = tensor_min #print('new_kernel_min: '+str(global_weight_min)) if tensor_max > global_weight_max: global_weight_max = tensor_max # Update 1x1 and 3x3 ranges if ('1x1' in all_vars[i].name): if tensor_min < global_1x1_weight_min: global_1x1_weight_min = tensor_min #print('new_1x1_kernel_min: '+str(global_1x1_weight_min)) if tensor_max > global_1x1_weight_max: global_1x1_weight_max = tensor_max if ('3x3' in all_vars[i].name): if tensor_min < global_3x3_weight_min: global_3x3_weight_min = tensor_min #print('new_3x3_kernel_min: '+str(global_3x3_weight_min)) if tensor_max > global_3x3_weight_max: global_3x3_weight_max = tensor_max if ('conv' in all_vars[i].name): if tensor_min < global_conv_weight_min: global_conv_weight_min = tensor_min #print('new_conv_kernel_min: '+str(global_conv_weight_min)) if tensor_max > global_conv_weight_max: global_conv_weight_max = tensor_max if ('biases' in all_vars[i].name): if tensor_min < global_bias_min: global_bias_min = tensor_min #print('new_bias_min: '+str(global_bias_min)) if tensor_max > global_bias_max: global_bias_max = tensor_max # Update 1x1 and 3x3 ranges if ('1x1' in all_vars[i].name): if tensor_min < global_1x1_bias_min: global_1x1_bias_min = tensor_min #print('new_1x1_bias_min: '+str(global_1x1_bias_min)) if tensor_max > global_1x1_bias_max: global_1x1_bias_max = tensor_max if ('3x3' in all_vars[i].name): if tensor_min < global_3x3_bias_min: global_3x3_bias_min = tensor_min #print('new_3x3_bias_min: '+str(global_3x3_bias_min)) if tensor_max > global_3x3_bias_max: global_3x3_bias_max = tensor_max if ('conv' in all_vars[i].name): if tensor_min < global_conv_bias_min: global_conv_bias_min = tensor_min #print('new_conv_bias_min: '+str(global_conv_bias_min)) if tensor_max > global_conv_bias_max: global_conv_bias_max = tensor_max # Update 1x1, 3x3, and conv ranges if ('1x1' in all_vars[i].name): if tensor_min < global_1x1_min: global_1x1_min = tensor_min if tensor_max > global_1x1_max: global_1x1_max = tensor_max if ('3x3' in all_vars[i].name): if tensor_min < global_3x3_min: global_3x3_min = tensor_min if tensor_max > global_3x3_max: global_3x3_max = tensor_max if ('conv' in all_vars[i].name): if tensor_min < global_conv_min: global_conv_min = tensor_min if tensor_max > global_conv_max: global_conv_max = tensor_max print('---') print('global spread:') print(global_max, global_min) print('global weight spread:') print(global_weight_max, global_weight_min) print('global bias spread:') print(global_bias_max, global_bias_min) print('---') print('global 1x1 spread:') print(global_1x1_max, global_1x1_min) print('global 1x1 weight spread:') print(global_1x1_weight_max, global_1x1_weight_min) print('global 1x1 bias spread:') print(global_1x1_bias_max, global_1x1_bias_min) print('---') print('global 3x3 spread:') print(global_3x3_max, global_3x3_min) print('global 3x3 weight spread:') print(global_3x3_weight_max, global_3x3_weight_min) print('global 3x3 bias spread:') print(global_3x3_bias_max, global_3x3_bias_min) print('---') print('global conv spread:') print(global_conv_max, global_conv_min) print('global conv weight spread:') print(global_conv_weight_max, global_conv_weight_min) print('global conv bias spread:') print(global_conv_bias_max, global_conv_bias_min) # For each set of parameters for i in range(len(all_vars)): print(all_vars[i].name) # Load the data into a numpy array for easy manipulation tensor = sess.run(all_vars[i]) # If conv and fire layers are to be scaled separately if FLAGS.separate_layer_scales: if ('conv' in all_vars[i].name): min_quant_val = global_conv_min max_quant_val = global_conv_max elif ('fire' in all_vars[i].name): min_quant_val = min(global_1x1_min, global_3x3_min) max_quant_val = max(global_1x1_max, global_3x3_max) else: print( "Error: Only conv, 3x3, and 1x1 currently supported" ) exit() else: min_quant_val = global_min max_quant_val = global_max # Get the set of values for quantization quant_val_arr = \ get_quant_val_array_from_minmax(min_quant_val, max_quant_val, FLAGS.model_bits, FLAGS.reserve_zero_val) # Loop over the whole tensor if 'biases' in all_vars[i].name: for idx0 in range(0, tensor.shape[0]): tensor[idx0] = round_to_quant_val( \ quant_val_arr, tensor[idx0], FLAGS.rounding_method) if 'kernels' in all_vars[i].name: for idx0 in range(0, tensor.shape[0]): for idx1 in range(0, tensor.shape[1]): for idx2 in range(0, tensor.shape[2]): for idx3 in range(0, tensor.shape[3]): #print('----') #print(tensor[idx0][idx1][idx2][idx3]) tensor[idx0][idx1][idx2][idx3] = \ round_to_quant_val( \ quant_val_arr, tensor[idx0][idx1][idx2][idx3], FLAGS.rounding_method) #print(tensor[idx0][idx1][idx2][idx3]) # Store the data back into the tensorflow variable test_op = tf.assign(all_vars[i], tensor) sess.run(test_op) ''' for i in range(len(all_vars)): if (('kernels' in all_vars[i].name) and \ (not ('Momentum' in all_vars[i].name))): if True: test_op = tf.assign(all_vars[i], \ tf.scalar_mul(0.90, (all_vars[i]))) sess.run(test_op) sess.run(all_vars[i]) ''' # Assuming model_checkpoint_path looks something like: # /ckpt_dir/model.ckpt-0, # extract global_step from it. global_step = ckpt_path.split('/')[-1].split('-')[-1] num_images = len(imdb.image_idx) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] _t = {'im_detect': Timer(), 'im_read': Timer(), 'misc': Timer()} num_detection = 0.0 for i in xrange(num_images): _t['im_read'].tic() images, scales = imdb.read_image_batch(shuffle=False) _t['im_read'].toc() _t['im_detect'].tic() det_boxes, det_probs, det_class = sess.run( [model.det_boxes, model.det_probs, model.det_class], feed_dict={model.image_input: images}) _t['im_detect'].toc() _t['misc'].tic() for j in range(len(det_boxes)): # batch # rescale det_boxes[j, :, 0::2] /= scales[j][0] det_boxes[j, :, 1::2] /= scales[j][1] det_bbox, score, det_class = model.filter_prediction( det_boxes[j], det_probs[j], det_class[j]) num_detection += len(det_bbox) for c, b, s in zip(det_class, det_bbox, score): all_boxes[c][i].append(bbox_transform(b) + [s]) _t['misc'].toc() print('im_detect: {:d}/{:d} im_read: {:.3f}s ' 'detect: {:.3f}s misc: {:.3f}s'.format( i + 1, num_images, _t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) print('Evaluating detections...') aps, ap_names = imdb.evaluate_detections(FLAGS.eval_dir, global_step, all_boxes) print('Evaluation summary:') print(' Average number of detections per image: {}:'.format( num_detection / num_images)) print(' Timing:') print(' im_read: {:.3f}s detect: {:.3f}s misc: {:.3f}s'.format( _t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) print(' Average precisions:') feed_dict = {} for cls, ap in zip(ap_names, aps): feed_dict[eval_summary_phs['APs/' + cls]] = ap print(' {}: {:.3f}'.format(cls, ap)) print(' Mean average precision: {:.3f}'.format(np.mean(aps))) feed_dict[eval_summary_phs['APs/mAP']] = np.mean(aps) feed_dict[eval_summary_phs['timing/im_detect']] = \ _t['im_detect'].average_time feed_dict[eval_summary_phs['timing/im_read']] = \ _t['im_read'].average_time feed_dict[eval_summary_phs['timing/post_proc']] = \ _t['misc'].average_time feed_dict[eval_summary_phs['num_det_per_image']] = \ num_detection/num_images print('Analyzing detections...') stats, ims = imdb.do_detection_analysis_in_eval( FLAGS.eval_dir, global_step) eval_summary_str = sess.run(eval_summary_ops, feed_dict=feed_dict) for sum_str in eval_summary_str: summary_writer.add_summary(sum_str, global_step)
def eval_once(saver, ckpt_path, summary_writer, imdb, model): with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: # Restores from checkpoint saver.restore(sess, ckpt_path) # Assuming model_checkpoint_path looks something like: # /ckpt_dir/model.ckpt-0, # extract global_step from it. global_step = ckpt_path.split('/')[-1].split('-')[-1] num_images = len(imdb.image_idx) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] _t = {'im_detect': Timer(), 'im_read': Timer(), 'misc': Timer()} num_detection = 0.0 for i in xrange(num_images): _t['im_read'].tic() images, scales = imdb.read_image_batch(shuffle=False) _t['im_read'].toc() _t['im_detect'].tic() det_boxes, det_probs, det_class = sess.run( [model.det_boxes, model.det_probs, model.det_class], feed_dict={model.image_input:images, \ model.is_training: False, model.keep_prob: 1.0}) _t['im_detect'].toc() _t['misc'].tic() for j in range(len(det_boxes)): # batch # rescale det_boxes[j, :, 0::2] /= scales[j][0] det_boxes[j, :, 1::2] /= scales[j][1] det_bbox, score, det_class = model.filter_prediction( det_boxes[j], det_probs[j], det_class[j]) num_detection += len(det_bbox) for c, b, s in zip(det_class, det_bbox, score): all_boxes[c][i].append(bbox_transform(b) + [s]) _t['misc'].toc() print ('im_detect: {:d}/{:d} im_read: {:.3f}s ' 'detect: {:.3f}s misc: {:.3f}s'.format( i+1, num_images, _t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) print ('Evaluating detections...') aps, ap_names = imdb.evaluate_detections( FLAGS.eval_dir, global_step, all_boxes) print ('Evaluation summary:') print (' Average number of detections per image: {}:'.format( num_detection/num_images)) print (' Timing:') print (' im_read: {:.3f}s detect: {:.3f}s misc: {:.3f}s'.format( _t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) print (' Average precisions:') eval_summary_ops = [] for cls, ap in zip(ap_names, aps): eval_summary_ops.append( tf.scalar_summary('APs/'+cls, ap) ) print (' {}: {:.3f}'.format(cls, ap)) print (' Mean average precision: {:.3f}'.format(np.mean(aps))) eval_summary_ops.append( tf.scalar_summary('APs/mAP', np.mean(aps)) ) eval_summary_ops.append( tf.scalar_summary('timing/image_detect', _t['im_detect'].average_time) ) eval_summary_ops.append( tf.scalar_summary('timing/image_read', _t['im_read'].average_time) ) eval_summary_ops.append( tf.scalar_summary('timing/post_process', _t['misc'].average_time) ) eval_summary_ops.append( tf.scalar_summary('num_detections_per_image', num_detection/num_images) ) print ('Analyzing detections...') stats, ims = imdb.do_detection_analysis_in_eval( FLAGS.eval_dir, global_step) for k, v in stats.iteritems(): eval_summary_ops.append( tf.scalar_summary( 'Detection Analysis/'+k, v) ) eval_summary_str = sess.run(eval_summary_ops) for sum_str in eval_summary_str: summary_writer.add_summary(sum_str, global_step)
def eval_once( saver, ckpt_path, summary_writer, eval_summary_ops, eval_summary_phs, imdb, model): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)) as sess: # Assuming model_checkpoint_path looks something like: # /ckpt_dir/model.ckpt-0, # extract global_step from it. global_step = ckpt_path.split('/')[-1].split('-')[-1] if os.path.exists(os.path.join(FLAGS.eval_dir, 'detection_files_' + str(global_step))): return # Restores from checkpoint saver.restore(sess, ckpt_path) num_images = len(imdb.image_idx) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] _t = {'im_detect': Timer(), 'im_read': Timer(), 'misc': Timer()} num_detection = 0.0 for i in xrange(num_images): _t['im_read'].tic() images, scales = imdb.read_image_batch(shuffle=False) _t['im_read'].toc() _t['im_detect'].tic() det_boxes, det_probs, det_class = sess.run( [model.det_boxes, model.det_probs, model.det_class], feed_dict={model.image_input:images}) _t['im_detect'].toc() _t['misc'].tic() for j in range(len(det_boxes)): # batch # rescale det_boxes[j, :, 0::2] /= scales[j][0] det_boxes[j, :, 1::2] /= scales[j][1] det_bbox, score, det_class = model.filter_prediction( det_boxes[j], det_probs[j], det_class[j]) num_detection += len(det_bbox) for c, b, s in zip(det_class, det_bbox, score): all_boxes[c][i].append(bbox_transform(b) + [s]) _t['misc'].toc() print ('im_detect: {:d}/{:d} im_read: {:.3f}s ' 'detect: {:.3f}s misc: {:.3f}s'.format( i+1, num_images, _t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) print ('Evaluating detections...') aps, ap_names = imdb.evaluate_detections( FLAGS.eval_dir, global_step, all_boxes) print ('Evaluation summary:') print (' Average number of detections per image: {}:'.format( num_detection/num_images)) print (' Timing:') print (' im_read: {:.3f}s detect: {:.3f}s misc: {:.3f}s'.format( _t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) print (' Average precisions:') feed_dict = {} for cls, ap in zip(ap_names, aps): feed_dict[eval_summary_phs['APs/'+cls]] = ap print (' {}: {:.3f}'.format(cls, ap)) print (' Mean average precision: {:.3f}'.format(np.mean(aps))) feed_dict[eval_summary_phs['APs/mAP']] = np.mean(aps) feed_dict[eval_summary_phs['timing/im_detect']] = \ _t['im_detect'].average_time feed_dict[eval_summary_phs['timing/im_read']] = \ _t['im_read'].average_time feed_dict[eval_summary_phs['timing/post_proc']] = \ _t['misc'].average_time feed_dict[eval_summary_phs['num_det_per_image']] = \ num_detection/num_images print ('Analyzing detections...') stats, ims = imdb.do_detection_analysis_in_eval( FLAGS.eval_dir, global_step) eval_summary_str = sess.run(eval_summary_ops, feed_dict=feed_dict) for sum_str in eval_summary_str: summary_writer.add_summary(sum_str, global_step)
def _add_interpretation_graph(self): """Interpret NN output.""" mc = self.mc with tf.variable_scope('interpret_output') as scope: preds = self.preds # probability num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES if mc.CLASSES == 1: self.pred_class_probs = tf.reshape( tf.sigmoid( tf.reshape(preds[:, :, :, :num_class_probs], [-1, mc.CLASSES])), [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], name='pred_class_probs') else: self.pred_class_probs = tf.reshape( tf.nn.softmax( tf.reshape(preds[:, :, :, :num_class_probs], [-1, mc.CLASSES])), [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], name='pred_class_probs') # bbox_delta self.pred_box_delta = tf.reshape(preds[:, :, :, num_class_probs:], [mc.BATCH_SIZE, mc.ANCHORS, 4], name='bbox_delta') with tf.variable_scope('bbox') as scope: with tf.variable_scope('stretching'): delta_x, delta_y, delta_w, delta_h = tf.unstack( self.pred_box_delta, axis=2) anchor_x = mc.ANCHOR_BOX[:, 0] anchor_y = mc.ANCHOR_BOX[:, 1] anchor_w = mc.ANCHOR_BOX[:, 2] anchor_h = mc.ANCHOR_BOX[:, 3] box_center_x = tf.identity(anchor_x + delta_x * anchor_w, name='bbox_cx') box_center_y = tf.identity(anchor_y + delta_y * anchor_h, name='bbox_cy') box_width = tf.identity(anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH), name='bbox_width') box_height = tf.identity(anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH), name='bbox_height') self._activation_summary(delta_x, 'delta_x') self._activation_summary(delta_y, 'delta_y') self._activation_summary(delta_w, 'delta_w') self._activation_summary(delta_h, 'delta_h') self._activation_summary(box_center_x, 'bbox_cx') self._activation_summary(box_center_y, 'bbox_cy') self._activation_summary(box_width, 'bbox_width') self._activation_summary(box_height, 'bbox_height') with tf.variable_scope('trimming'): xmins, ymins, xmaxs, ymaxs = util.bbox_transform( [box_center_x, box_center_y, box_width, box_height]) # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = tf.minimum(tf.maximum(0.0, xmins), mc.IMAGE_WIDTH - 1.0, name='bbox_xmin') self._activation_summary(xmins, 'box_xmin') ymins = tf.minimum(tf.maximum(0.0, ymins), mc.IMAGE_HEIGHT - 1.0, name='bbox_ymin') self._activation_summary(ymins, 'box_ymin') xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs), 0.0, name='bbox_xmax') self._activation_summary(xmaxs, 'box_xmax') ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs), 0.0, name='bbox_ymax') self._activation_summary(ymaxs, 'box_ymax') self.det_boxes = tf.transpose(tf.stack( util.bbox_transform_inv([xmins, ymins, xmaxs, ymaxs])), (1, 2, 0), name='bbox') with tf.variable_scope('probability') as scope: self._activation_summary(self.pred_class_probs, 'class_probs') probs = self.pred_class_probs self.det_probs = tf.reduce_max(probs, 2, name='score') self.det_class = tf.argmax(probs, 2, name='class_idx')
def _add_interpretation_graph(self): """Interpret NN output.""" mc = self.mc with tf.variable_scope('interpret_output') as scope: preds = self.preds # probability num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES self.pred_class_probs = tf.reshape( tf.nn.softmax( tf.reshape(preds[:, :, :, :num_class_probs], [-1, mc.CLASSES])), [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], name='pred_class_probs') # confidence num_confidence_scores = mc.ANCHOR_PER_GRID + num_class_probs self.pred_conf = tf.sigmoid(tf.reshape( preds[:, :, :, num_class_probs:num_confidence_scores], [mc.BATCH_SIZE, mc.ANCHORS]), name='pred_confidence_score') # bbox_delta self.pred_box_delta = tf.reshape(preds[:, :, :, num_confidence_scores:], [mc.BATCH_SIZE, mc.ANCHORS, 4], name='bbox_delta') # number of objects. Used to normalize bbox and classification loss # self.num_objects = tf.reduce_sum(self.input_mask, name='num_objects') self.num_objects = tf.cast( tf.size(self.box_input["image/object/bbox/xmin"].values), tf.float32) with tf.variable_scope('bbox') as scope: with tf.variable_scope('stretching'): delta_x, delta_y, delta_w, delta_h = tf.unstack( self.pred_box_delta, axis=2) anchor_x = mc.ANCHOR_BOX[:, 0] anchor_y = mc.ANCHOR_BOX[:, 1] anchor_w = mc.ANCHOR_BOX[:, 2] anchor_h = mc.ANCHOR_BOX[:, 3] box_center_x = tf.identity(anchor_x + delta_x * anchor_w, name='bbox_cx') box_center_y = tf.identity(anchor_y + delta_y * anchor_h, name='bbox_cy') box_width = tf.identity(anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH), name='bbox_width') box_height = tf.identity(anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH), name='bbox_height') self._activation_summary(delta_x, 'delta_x') self._activation_summary(delta_y, 'delta_y') self._activation_summary(delta_w, 'delta_w') self._activation_summary(delta_h, 'delta_h') self._activation_summary(box_center_x, 'bbox_cx') self._activation_summary(box_center_y, 'bbox_cy') self._activation_summary(box_width, 'bbox_width') self._activation_summary(box_height, 'bbox_height') with tf.variable_scope('trimming'): xmins, ymins, xmaxs, ymaxs = util.bbox_transform( [box_center_x, box_center_y, box_width, box_height]) # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = tf.minimum( tf.maximum(0.0, xmins), mc.IMAGE_WIDTH - 1.0, name='bbox_xmin') # shape = [mc.BATCH_SIZE, mc.ANCHORS] self._activation_summary(xmins, 'box_xmin') ymins = tf.minimum( tf.maximum(0.0, ymins), mc.IMAGE_HEIGHT - 1.0, name='bbox_ymin') # shape = [mc.BATCH_SIZE, mc.ANCHORS] self._activation_summary(ymins, 'box_ymin') xmaxs = tf.maximum( tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs), 0.0, name='bbox_xmax') # shape = [mc.BATCH_SIZE, mc.ANCHORS] self._activation_summary(xmaxs, 'box_xmax') ymaxs = tf.maximum( tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs), 0.0, name='bbox_ymax') # shape = [mc.BATCH_SIZE, mc.ANCHORS] self._activation_summary(ymaxs, 'box_ymax') self.det_boxes = { "xmins": xmins, "ymins": ymins, "xmaxs": xmaxs, "ymaxs": ymaxs } with tf.name_scope('IOU'): def _tensor_iou(box1, box2): with tf.name_scope('intersection'): xmin = tf.maximum(box1["xmin"], box2["xmin"], name='xmin') ymin = tf.maximum(box1["ymin"], box2["ymin"], name='ymin') xmax = tf.minimum(box1["xmax"], box2["xmax"], name='xmax') ymax = tf.minimum(box1["ymax"], box2["ymax"], name='ymax') w = tf.maximum(0.0, xmax - xmin, name='inter_w') h = tf.maximum(0.0, ymax - ymin, name='inter_h') intersection = tf.multiply(w, h, name='intersection') with tf.name_scope('union'): w1 = tf.subtract(box1["xmax"], box1["xmin"], name='w1') h1 = tf.subtract(box1["ymax"], box1["ymin"], name='h1') w2 = tf.subtract(box2["xmax"], box2["xmin"], name='w2') h2 = tf.subtract(box2["ymax"], box2["ymin"], name='h2') union = tf.cast(w1 * h1 + w2 * h2 - intersection, dtype=tf.float32) return tf.truediv( tf.cast(intersection, dtype=tf.float32), union + tf.constant(mc.EPSILON, dtype=tf.float32)) mini_ious_values = _tensor_iou( { "xmin": tf.cast(tf.gather_nd(xmins, self.paired_aidx_values), tf.float32), "ymin": tf.cast(tf.gather_nd(ymins, self.paired_aidx_values), tf.float32), "xmax": tf.cast(tf.gather_nd(xmaxs, self.paired_aidx_values), tf.float32), "ymax": tf.cast(tf.gather_nd(ymaxs, self.paired_aidx_values), tf.float32) }, # predicted boxes { "xmin": tf.cast(self.box_input["image/object/bbox/xmin"].values, tf.float32), "ymin": tf.cast(self.box_input["image/object/bbox/ymin"].values, tf.float32), "xmax": tf.cast(self.box_input["image/object/bbox/xmax"].values, tf.float32), "ymax": tf.cast(self.box_input["image/object/bbox/ymax"].values, tf.float32) }) # input boxes # after computing the ious of the responsible boxes, # put the values to a large plane containing all anchors which are responsible and those which are not self._ious = tf.scatter_nd(self.paired_aidx_values, mini_ious_values, [mc.BATCH_SIZE, mc.ANCHORS]) self._activation_summary(self._ious, 'conf_score') with tf.variable_scope('probability') as scope: self._activation_summary(self.pred_class_probs, 'class_probs') probs = tf.multiply(self.pred_class_probs, tf.reshape(self.pred_conf, [mc.BATCH_SIZE, mc.ANCHORS, 1]), name='final_class_prob') self._activation_summary(probs, 'final_class_prob') self.det_probs = tf.reduce_max(probs, axis=2, name='score') self.det_class = tf.argmax(probs, axis=2, name='class_idx') self._activation_summary( tf.gather_nd(self.det_class, self.paired_aidx_values), 'detected_classes') # get prediction boxes self.prediction_boxes, self.score,\ self.cls_idx_per_img, self.filter_summaries = self.filter_prediction()
def eval_once(saver, ckpt_path, summary_writer, imdb, model): with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: # Restores from checkpoint saver.restore(sess, ckpt_path) # Assuming model_checkpoint_path looks something like: # /ckpt_dir/model.ckpt-0, # extract global_step from it. global_step = ckpt_path.split('/')[-1].split('-')[-1] num_images = len(imdb.image_idx) total_acc, total_missed, total_culled = float(0.0), float(0.0), float( 0.0) error_hist_bucket_width = 5 error_hist = [0 for _ in range(int(100 / error_hist_bucket_width))] all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] _t = {'im_detect': Timer(), 'im_read': Timer(), 'misc': Timer()} num_detection = 0.0 acc, missed, culled = float(0.0), float(0.0), float(0.0) for i in xrange(num_images): _t['im_read'].tic() images, scales, gt_masks = imdb.read_image_batch(shuffle=False) _t['im_read'].toc() if FLAGS.net == 'resnet50_filter': _t['im_detect'].tic() pred_masks = sess.run([model.preds], feed_dict={ model.image_input: images, model.keep_prob: 1.0 }) _t['im_detect'].toc() _t['misc'].tic() _t['misc'].toc() assert gt_masks[0].shape == pred_masks[0][0].shape, \ 'Ground truth mask and predicted mask have different dimensions' # Metrics acc = (abs(gt_masks[0] - pred_masks[0][0]) < float(0.5)).sum() / (12 * 39) missed = (gt_masks[0] - pred_masks[0][0] > float(0.9)).sum() / (12 * 39) culled = (pred_masks[0][0] < float(0.1)).sum() / (12 * 39) # Update totals total_acc = total_acc + acc total_missed = total_missed + missed total_culled = total_culled + culled # Update histogram int_missed = int( np.floor(missed * float(100.0) / float(error_hist_bucket_width))) error_hist[int_missed] = error_hist[int_missed] + 1 else: _t['im_detect'].tic() det_boxes, det_probs, det_class = sess.run( [model.det_boxes, model.det_probs, model.det_class], feed_dict={ model.image_input: images, model.keep_prob: 1.0 }) _t['im_detect'].toc() _t['misc'].tic() for j in range(len(det_boxes)): # batch # rescale det_boxes[j, :, 0::2] /= scales[j][0] det_boxes[j, :, 1::2] /= scales[j][1] det_bbox, score, det_class = model.filter_prediction( det_boxes[j], det_probs[j], det_class[j]) num_detection += len(det_bbox) for c, b, s in zip(det_class, det_bbox, score): all_boxes[c][i].append(bbox_transform(b) + [s]) _t['misc'].toc() print('im_detect: {:d}/{:d} im_read: {:.3f}s ' 'detect: {:.3f}s misc: {:.3f}s, ' 'accuracy: {:.2f}%, missed: {:.2f}%, culled: {:.2f}%'.format( i + 1, num_images, _t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time, acc * float(100.0), missed * float(100.0), culled * float(100.0))) total_acc = total_acc / float(num_images) total_missed = total_missed / float(num_images) total_culled = total_culled / float(num_images) print('Total # images: {:d}, Avg accuracy: {:.2f}%, ' 'Avg error: {:.2f}%, Avg culling: {:.2f}%'.format( num_images, total_acc * float(100.0), total_missed * float(100.0), total_culled * float(100.0))) print('Error histogram: {0}'.format(error_hist))
def eval_once(saver, ckpt_path, summary_writer, eval_summary_ops, eval_summary_phs, imdb, model): with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: # Restores from checkpoint saver.restore(sess, ckpt_path) # Assuming model_checkpoint_path looks something like: # /ckpt_dir/model.ckpt-0, # extract global_step from it. global_step = ckpt_path.split('/')[-1].split('-')[-1] #(7,-9,-1)0.000 #(7,-15,-2)0.743 #(7,-16,-2)0.800 #(7,-16,-1)0.891 #(7,-15,-1)0.788 #(7,-14,-1)0.450 #(7,-15,0)0.774 #new #(7,-7, 0)0.457 #(7,-9,-1)0.881 #(7,-8,-1)0.669 #(7,-9,-2)0.778 #(7,-10,-3)0.010 #fix_ops = f2p.float2pow2_offline(7, -9, -1, "./data/weights", sess, resave=True, convert=False, trt=True) # sess.run(tf.initialize_variables([fix_ops])) # sess.run(tf.initialize_variables(tf.trainable_variables())) # sess.run(tf.initialize_variables(tf.all_variables())) #sess.run(fix_ops) #exit() # for x in tf.trainable_variables(): # print (x.eval(session=sess)) # save_data(sess, "../data/fixedweight") num_images = len(imdb.image_idx) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] _t = {'im_detect': Timer(), 'im_read': Timer(), 'misc': Timer()} # np.set_printoptions(threshold='nan') # probs_file = open('probs.txt', 'w') # boxes_file = open('boxes.txt', 'w') # class_file = open('class.txt', 'w') # preds_file = open('preds.txt', 'w') num_detection = 0.0 for i in xrange(num_images): _t['im_read'].tic() images, scales = imdb.read_image_batch(shuffle=False) _t['im_read'].toc() _t['im_detect'].tic() # det_boxes, det_probs, det_class = sess.run( # [model.det_boxes, model.det_probs, model.det_class], # feed_dict={model.image_input:images}) det_boxes, det_probs, det_class, det_conf = sess.run( [ model.det_boxes, model.det_probs, model.det_class, model.pred_conf ], feed_dict={model.image_input: images}) _t['im_detect'].toc() # det_boxes, det_probs, det_class, preds = sess.run( # [model.det_boxes, model.det_probs, model.det_class, model.preds], # feed_dict={model.image_input:images}) # probs_file.write(str(det_probs)) # boxes_file.write(str(det_boxes)) # class_file.write(str(det_class)) # preds_file.write(str(preds)) # probs_file.close() # boxes_file.close() # class_file.close() # preds_file.close() # exit() _t['misc'].tic() # take the clip and save LocalPath = os.getcwd() save_path = LocalPath + cfg.SAVE_PATH + '/activations' if not os.path.exists(save_path): os.mkdir(save_path) ratio_key = open(save_path + '/ratio_key.txt', 'w+') ratio_value = open(save_path + '/ratio_value.txt', 'w+') for x in layer_list: process_fm(checkpoint_path, x, parameter_save) ratio_key.close() ratio_value.close() ################# over ################## for j in range(len(det_boxes)): # batch # rescale det_boxes[j, :, 0::2] /= scales[j][0] det_boxes[j, :, 1::2] /= scales[j][1] # det_bbox, score, det_class = model.filter_prediction( # det_boxes[j], det_probs[j], det_class[j]) det_bbox, score, det_class = model.dac_filter_prediction( det_boxes[j], det_probs[j], det_class[j], det_conf[j]) num_detection += len(det_bbox) for c, b, s in zip(det_class, det_bbox, score): all_boxes[c][i].append(bbox_transform(b) + [s]) _t['misc'].toc() print('im_detect: {:d}/{:d} im_read: {:.3f}s ' 'detect: {:.3f}s misc: {:.3f}s'.format( i + 1, num_images, _t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) print('Evaluating detections...') aps, ap_names = imdb.evaluate_detections(FLAGS.eval_dir, global_step, all_boxes) print('Evaluation summary:') print(' Average number of detections per image: {}:'.format( num_detection / num_images)) print(' Timing:') print(' im_read: {:.3f}s detect: {:.3f}s misc: {:.3f}s'.format( _t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) print(' Average precisions:') feed_dict = {} for cls, ap in zip(ap_names, aps): feed_dict[eval_summary_phs['APs/' + cls]] = ap print(' {}: {:.3f}'.format(cls, ap)) print(' Mean average precision: {:.3f}'.format(np.mean(aps))) feed_dict[eval_summary_phs['APs/mAP']] = np.mean(aps) feed_dict[eval_summary_phs['timing/im_detect']] = \ _t['im_detect'].average_time feed_dict[eval_summary_phs['timing/im_read']] = \ _t['im_read'].average_time feed_dict[eval_summary_phs['timing/post_proc']] = \ _t['misc'].average_time feed_dict[eval_summary_phs['num_det_per_image']] = \ num_detection/num_images print('Analyzing detections...') stats, ims = imdb.do_detection_analysis_in_eval( FLAGS.eval_dir, global_step) eval_summary_str = sess.run(eval_summary_ops, feed_dict=feed_dict) for sum_str in eval_summary_str: summary_writer.add_summary(sum_str, global_step)
def eval_once(saver, summary_writer, imdb, model, mc): with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: # Initialize init = tf.global_variables_initializer() sess.run(init) #global_step = '0' global_step = None n_imgs = len(imdb.image_idx) n_iters = int(n_imgs / mc.BATCH_SIZE) + 1 all_boxes = [[[] for _ in xrange(n_imgs)] for _ in xrange(imdb.num_classes)] _t = {'im_detect': Timer(), 'im_read': Timer(), 'misc': Timer()} num_detection = 0.0 for i in xrange(n_iters): _t['im_read'].tic() images, scales = imdb.read_image_batch(shuffle=False) _t['im_read'].toc() _t['im_detect'].tic() # TODO(jeff): remove output other than det_boxes, det_probs, det_class det_boxes, det_probs, det_class, probs, confs, \ conv13, reorg20, concat20 = sess.run( [ model.det_boxes, model.det_probs, model.det_class, model.probs, model.pred_conf, model.conv13, model.reorg20, model.concat20 ], feed_dict={model.image_input:images, \ model.is_training: False, model.keep_prob: 1.0} ) _t['im_detect'].toc() _t['misc'].tic() for j in range(len(det_boxes)): # batch # rescale det_boxes[j, :, 0::2] /= scales[j][0] det_boxes[j, :, 1::2] /= scales[j][1] det_bbox, score, det_class = model.filter_yolo_predict( det_boxes[j], det_probs[j], det_class[j]) num_detection += len(det_bbox) for c, b, s in zip(det_class, det_bbox, score): all_boxes[c][i].append(bbox_transform(b) + [s]) _t['misc'].toc() print('im_detect: {:d}/{:d} im_read: {:.3f}s ' 'detect: {:.3f}s misc: {:.3f}s'.format( i + 1, n_imgs, _t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) print('Evaluating detections...') aps, ap_names = imdb.evaluate_detections(FLAGS.eval_dir, global_step, all_boxes) print('Evaluation summary:') print(' Average number of detections per image: {}:'.format( num_detection / n_imgs)) print(' Timing:') print(' im_read: {:.3f}s detect: {:.3f}s misc: {:.3f}s'.format( _t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) print(' Average precisions:') eval_summary_ops = [] for cls, ap in zip(ap_names, aps): eval_summary_ops.append(tf.summary.scalar('APs/' + cls, ap)) print(' {}: {:.3f}'.format(cls, ap)) print(' Mean average precision: {:.3f}'.format(np.mean(aps))) eval_summary_ops.append(tf.summary.scalar('APs/mAP', np.mean(aps))) eval_summary_ops.append( tf.summary.scalar('timing/image_detect', _t['im_detect'].average_time)) eval_summary_ops.append( tf.summary.scalar('timing/image_read', _t['im_read'].average_time)) eval_summary_ops.append( tf.summary.scalar('timing/post_process', _t['misc'].average_time)) eval_summary_ops.append( tf.summary.scalar('num_detections_per_image', num_detection / n_imgs)) print('Analyzing detections...') stats, ims = imdb.do_detection_analysis_in_eval( FLAGS.eval_dir, global_step) for k, v in stats.iteritems(): eval_summary_ops.append( tf.summary.scalar('Detection Analysis/' + k, v)) eval_summary_str = sess.run(eval_summary_ops) for sum_str in eval_summary_str: summary_writer.add_summary(sum_str, global_step)
def lj_caltech_test_demo(): """Detect image.""" with tf.Graph().as_default(): # Load model mc = caltech_vgg16_config() mc.BATCH_SIZE = 1 mc.PLOT_PROB_THRESH = 0.01 ######################################## added by LJ mc.NMS_THRESH = 0.4 ######################################## added by LJ mc.PROB_THRESH = 0.01 ######################################## added by LJ # model parameters will be restored from checkpoint mc.LOAD_PRETRAINED_MODEL = False model = VGG16ConvDet(mc, FLAGS.gpu) saver = tf.train.Saver(model.model_params) caltech_imdb = caltech(image_set='test', mc=mc) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: saver.restore(sess, FLAGS.checkpoint) times = {} for image_path in caltech_imdb.image_idx: t_start = time.time() im = cv2.imread(image_path) scale_x = im.shape[0] / mc.IMAGE_HEIGHT scale_y = im.shape[1] / mc.IMAGE_WIDTH im = im.astype(np.float32, copy=False) im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT)) input_image = im - mc.BGR_MEANS t_reshape = time.time() times['reshape'] = t_reshape - t_start # Detect det_boxes, det_probs, det_class = sess.run( [model.det_boxes, model.det_probs, model.det_class], feed_dict={ model.image_input: [input_image], model.keep_prob: 1.0 }) t_detect = time.time() times['detect'] = t_detect - t_reshape # Filter final_boxes, final_probs, final_class = model.filter_prediction( det_boxes[0], det_probs[0], det_class[0]) keep_idx = [idx for idx in range(len(final_probs)) \ if final_probs[idx] > mc.PLOT_PROB_THRESH] final_boxes = [final_boxes[idx] for idx in keep_idx] final_probs = [final_probs[idx] for idx in keep_idx] final_class = [final_class[idx] for idx in keep_idx] abs_path = '/home/bryant/MATLAB-tools/Matlab evaluation_labeling code3.2.1/data-USA/res/VGG+conv/' txt_path = image_path[-21:-11] + '.txt' txt_path = txt_path.replace('v', 'V') txt_path = abs_path + txt_path fram_id = int(image_path[-9:-4]) with open(txt_path, 'a') as f: for bbox, label in zip(final_boxes, final_probs): bbox = bbox_transform(bbox) xmin, ymin, xmax, ymax = [b for b in bbox] box_res = '{:d},{:.4f},{:.4f},{:.4f},{:.4f},{:.4f}\n'.format( fram_id, xmin * scale_x, ymin * scale_y, (xmax - xmin + 1) * scale_x, (ymax - ymin + 1) * scale_y, label) f.write(box_res) f.close() t_filter = time.time() times['nms'] = t_filter - t_detect # TODO(bichen): move this color dict to configuration file cls2clr = {'pedestrian': (255, 0, 191)} # Draw boxes _draw_box( im, final_boxes, ['%.2f'% prob \ for idx, prob in zip(final_class, final_probs)], cdict=cls2clr, ) t_draw = time.time() times['draw'] = t_draw - t_filter im = im.astype(np.uint8, copy=False) cv2.imshow('img', im) ############################### added by LJ k = cv2.waitKey(1) if k == 27: pass #cv2.destroyWindow('img') times['total'] = time.time() - t_start # time_str = '' # for t in times: # time_str += '{} time: {:.4f} '.format(t[0], t[1]) # time_str += '\n' time_str = 'Total time: {:.4f}, detection time: {:.4f}, filter time: '\ '{:.4f}'. \ format(times['total'], times['detect'], times['nms']) print(time_str) print('over')
def _add_yolo_interpret_graph(self): """Interpret yolo output.""" mc = self.mc with tf.variable_scope('interpret_output') as scope: # TODO(jeff): add summary N = mc.BATCH_SIZE H, W, B = mc.NET_OUT_SHAPE C = mc.CLASSES preds = self.preds preds = tf.reshape(self.preds, (N, H, W, B, 5 + C)) # confidence self.pred_conf = tf.sigmoid(tf.reshape(preds[:, :, :, :, 5], (N, H, W, B, 1)), name='conf') # bbox scale self.bbox_x = tf.reshape(tf.add( tf.sigmoid(preds[:, :, :, :, 0]), tf.reshape(tf.to_float(tf.range(0, W, 1)), (1, 1, W, 1))), (N, H, W, B, 1), name='bbox_x_ratio') self.bbox_y = tf.reshape(tf.add( tf.sigmoid(preds[:, :, :, :, 1]), tf.reshape(tf.to_float(tf.range(0, H, 1)), (1, H, 1, 1))), (N, H, W, B, 1), name='bbox_y_ratio') self.bbox_w = tf.reshape(tf.multiply(tf.exp(preds[:, :, :, :, 2]), mc.ANCHOR_BOX[:, :, :, 0]), (N, H, W, B, 1), name='bbox_w_ratio') self.bbox_h = tf.reshape(tf.multiply(tf.exp(preds[:, :, :, :, 3]), mc.ANCHOR_BOX[:, :, :, 1]), (N, H, W, B, 1), name='bbox_h_ratio') self.bbox = tf.stack( [self.bbox_x, self.bbox_y, self.bbox_w, self.bbox_h], axis=4, name='bbox_ratio') # bbox prediction w_scale = float(mc.IMAGE_WIDTH) / W h_scale = float(mc.IMAGE_HEIGHT) / H self.raw_boxes = tf.reshape(tf.stack([ self.bbox_x * w_scale, self.bbox_y * h_scale, self.bbox_w * w_scale, self.bbox_h * h_scale ], axis=4), (N, H * W * B, 4), name='raw_bbox') # trim bbox self.det_boxes = tf.py_func(lambda x: util.bbox_transform_inv(x), [ self._trim_bbox( tf.py_func(lambda x: util.bbox_transform(x), [self.raw_boxes], tf.float32)) ], tf.float32, name='det_boxes') # prob self.probs = tf.multiply(self._smooth_softmax(preds[:, :, :, :, 5:]), self.pred_conf, name='probs') # class prediction self.det_probs = tf.reshape( #tf.reduce_max(self.probs, 4), self.probs, (N, H * W * B, C), name='score') self.det_class = tf.reshape(tf.argmax(self.probs, 4), (N, H * W * B), name='class_idx')
def eval_checkpoint(model, imdb, saver, summary_writer, test_dir, checkpoint_path, eval_summary_phs, eval_summary_ops): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.05) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)) as sess: global_step = checkpoint_path.split('/')[-1].split('-')[-1] if os.path.exists( os.path.join(test_dir, 'detection_files_' + str(global_step))): print('Already evaluated') return saver.restore(sess, checkpoint_path) num_images = len(imdb.image_idx) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] _t = {'im_detect': Timer(), 'im_read': Timer(), 'misc': Timer()} num_detection = 0.0 for i in xrange(num_images): _t['im_read'].tic() images, scales = imdb.read_image_batch(shuffle=False) _t['im_read'].toc() _t['im_detect'].tic() det_boxes, det_probs, det_class = sess.run( [model.det_boxes, model.det_probs, model.det_class], feed_dict={model.image_input: images}) _t['im_detect'].toc() _t['misc'].tic() for j in xrange(len(det_boxes)): # batch # rescale det_boxes[j, :, 0::2] /= scales[j][0] det_boxes[j, :, 1::2] /= scales[j][1] det_bbox, score, det_class = model.filter_prediction( det_boxes[j], det_probs[j], det_class[j]) num_detection += len(det_bbox) for c, b, s in zip(det_class, det_bbox, score): all_boxes[c][i].append(bbox_transform(b) + [s]) _t['misc'].toc() print('im_detect: %s/%s im_read: %.3fs detect: %.3fs misc: %.3fs' % (i + 1, num_images, _t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) print('Evaluating detections...') aps, ap_names = imdb.evaluate_detections(test_dir, global_step, all_boxes) print('Evaluation summary:') print(' Average number of detections per image: %s:' % (num_detection / num_images)) print(' Timing:') print(' im_read: %.3fs detect: %.3fs misc: %.3fs' % (_t['im_read'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) print(' Average precisions:') feed_dict = {} for cls, ap in zip(ap_names, aps): feed_dict[eval_summary_phs['APs/' + cls]] = ap print(' %s: %.3f' % (cls, ap)) print(' Mean average precision: %.3f' % np.mean(aps)) feed_dict[eval_summary_phs['APs/mAP']] = np.mean(aps) feed_dict[eval_summary_phs['timing/im_detect']] = _t[ 'im_detect'].average_time feed_dict[ eval_summary_phs['timing/im_read']] = _t['im_read'].average_time feed_dict[ eval_summary_phs['timing/post_proc']] = _t['misc'].average_time feed_dict[ eval_summary_phs['num_det_per_image']] = num_detection / num_images print('Analyzing detections...') stats, ims = imdb.do_detection_analysis_in_eval(test_dir, global_step) eval_summary_str = sess.run(eval_summary_ops, feed_dict=feed_dict) for sum_str in eval_summary_str: summary_writer.add_summary(sum_str, global_step)
def Get_feed_data(self): mc = self.mc batch_gt_boxes, batch_gt_labels, batch_image = self.read_batch_gt_data( shuffle=True) batch_gt_boxes = np.array(batch_gt_boxes) batch_gt_labels = np.array(batch_gt_labels) batch_image = np.array(batch_image) input_images = [] gt_data = [] #print ('[Get_feed_data] 0') #print ('len(batch_gt_boxes):',len(batch_gt_boxes)) for i in range(0, len(batch_gt_boxes)): #print ('------------{} get feed data'.format(i)) im = batch_image[i] im -= mc.BGR_MEANS gt_bbox = np.array(batch_gt_boxes[i]) gt_label = np.array(batch_gt_labels[i]) if len(gt_bbox) == 0: print('len(gt_bbox) == 0') raw_input('pause') im, gt_bbox, anno_box_filter_idx = self.Preprocess(im, gt_bbox) #print ('3---------------------------------') #anno_box_filter_idx = [i for i in range(len(gt_boxes))] assert len(anno_box_filter_idx) == len(gt_bbox) assert len(gt_bbox) != 0 lables = [] for idx in anno_box_filter_idx: lables.append(gt_label[idx]) gt_label = np.array(lables) orig_h, orig_w, _ = [float(v) for v in im.shape] #mirror gt_bbox[:, 0::2] *= orig_w gt_bbox[:, 1::2] *= orig_h gt_bbox_center = np.array( [bbox_transform_inv(box) for box in gt_bbox]) if np.random.randint(2) > 0.5: im = im[:, ::-1, :] gt_bbox_center[:, 0] = orig_w - 1 - gt_bbox_center[:, 0] gt_bbox = np.array([bbox_transform(box) for box in gt_bbox_center]) gt_bbox[:, 0::2] /= orig_w gt_bbox[:, 1::2] /= orig_h #print ('[Get_feed_data]gt_bbox:',gt_bbox) im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT)) input_images.append(im) ''' filename = 'logs/' + str(i) + '.jpg' self.draw_annno(im,gt_bbox,filename) print ('---------save :',filename) #cv2.waitKey(20) ''' #raw_input('pasue') # scale image #image_anno = im + mc.BGR_MEANS #self.draw_annno(image_anno,gt_bbox,'test_' + str(i) + '.jpg') #gt_data.append([i,]) num = len(gt_bbox) for j in range(0, num): #gt_data.append([i,gt_label[j],0,gt_bbox[j][0],gt_bbox[j][1],gt_bbox[j][2],gt_bbox[j][3]]) gt_data.append([ float(i), float(gt_label[j]), float(0), float(gt_bbox[j][0]), float(gt_bbox[j][1]), float(gt_bbox[j][2]), float(gt_bbox[j][3]) ]) #batch_ids = np.ones((num ,1))*i #instance_ids = np.ones((num ,1)) #gt_data.append(np.concatenate([batch_ids,gt_label,instance_ids,gt_bbox],axis=1)) #print ('4---------------------------------') gt_boxes, gt_labels = self.parse_gt_data(gt_data) all_match_indices, all_match_overlaps = self._match_bbox( mc.ANCHOR_BOX, gt_boxes) assert len(all_match_indices) != 0 gt_boxes_dense, gt_labels_dense, input_mask = self._sparse_to_dense( gt_boxes, gt_labels, all_match_indices) assert len(gt_bbox) != 0 return input_images, gt_boxes_dense, gt_labels_dense, input_mask, all_match_overlaps
def read_batch(self, shuffle=True, wrap_around=True): """Read a batch of image and instance annotations. Args: shuffle: whether or not to shuffle the dataset wrap_around: cyclic data extraction Returns: image_per_batch: images. Shape: batch_size x width x height x [b, g, r] label_per_batch: labels. Shape: batch_size x object_num delta_per_batch: bounding box or mask deltas. Shape: batch_size x object_num x [dx ,dy, dw, dh] or [dx, dy, dw, dh, dof1, dof2, dof3, dof4] aidx_per_batch: index of anchors that are responsible for prediction. Shape: batch_size x object_num bbox_per_batch: scaled bounding boxes or mask parameters. Shape: batch_size x object_num x [cx, cy, w, h] or [cx, cy, w, h, of1, of2, of3, of4] """ mc = self.mc if shuffle: if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx): self._shuffle_image_idx() batch_idx = self._perm_idx[self._cur_idx:self._cur_idx + mc.BATCH_SIZE] self._cur_idx += mc.BATCH_SIZE else: # Check for warp around only in non shuffle mode if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx): batch_idx = self._image_idx[self._cur_idx:] \ + self._image_idx[:self._cur_idx + mc.BATCH_SIZE-len(self._image_idx)] if wrap_around: self._cur_idx += mc.BATCH_SIZE - len(self._image_idx) else: # Restart the counter if no-wrap-around is enabled # This ensures all the validation examples are evaluated self._cur_idx = 0 else: batch_idx = self._image_idx[self._cur_idx:self._cur_idx + mc.BATCH_SIZE] self._cur_idx += mc.BATCH_SIZE image_per_batch = [] label_per_batch = [] bbox_per_batch = [] delta_per_batch = [] aidx_per_batch = [] boundary_adhesions_per_batch = [] if mc.DEBUG_MODE: avg_ious = 0. num_objects = 0. max_iou = 0.0 min_iou = 1.0 num_zero_iou_obj = 0 for img_ct, idx in enumerate(batch_idx): # load the image try: # Seems to be the only way to detect invalid image files Image.open(self._image_path_at(idx)).tobytes() except IOError: print('Detect error img %s' % self._image_path_at(idx)) continue im = cv2.imread(self._image_path_at(idx)).astype(np.float32, copy=False) if im is None: print("\n\nCorrupt image found: ", self._image_path_at(idx)) continue im = im.astype(np.float32, copy=False) im -= mc.BGR_MEANS orig_h, orig_w, _ = [float(v) for v in im.shape] # load annotations label_per_batch.append([b[4] for b in self._rois[idx][:]]) gt_bbox_pre = np.array([[b[0], b[1], b[2], b[3]] for b in self._rois[idx][:]]) if mc.EIGHT_POINT_REGRESSION: polygons = [b[2] for b in self._poly[idx][:]] boundary_adhesion_pre = np.array( [[b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]] for b in self._boundary_adhesions[idx][:]]) else: boundary_adhesion_pre = np.array( [[b[0], b[1], b[2], b[3]] for b in self._boundary_adhesions[idx][:]]) is_drift_performed = False is_flip_performed = False assert np.all((gt_bbox_pre[:, 0] - (gt_bbox_pre[:, 2]/2.0)) >= 0) or \ np.all((gt_bbox_pre[:, 0] + (gt_bbox_pre[:, 2]/2.0)) < orig_w), "Error in the bounding boxes before augmentation" if mc.DATA_AUGMENTATION: assert mc.DRIFT_X >= 0 and mc.DRIFT_Y > 0, \ 'mc.DRIFT_X and mc.DRIFT_Y must be >= 0' if mc.DRIFT_X > 0 or mc.DRIFT_Y > 0: # Ensures that gt bounding box is not cut out of the image max_drift_x = math.floor( min(gt_bbox_pre[:, 0] - (gt_bbox_pre[:, 2] / 2.0) + 1)) max_drift_y = math.floor( min(gt_bbox_pre[:, 1] - (gt_bbox_pre[:, 3] / 2.0) + 1)) assert max_drift_x >= 0 and max_drift_y >= 0, 'bbox out of image' dy = np.random.randint(-mc.DRIFT_Y, min(mc.DRIFT_Y + 1, max_drift_y)) dx = np.random.randint(-mc.DRIFT_X, min(mc.DRIFT_X + 1, max_drift_x)) # shift bbox gt_bbox_pre[:, 0] = gt_bbox_pre[:, 0] - dx gt_bbox_pre[:, 1] = gt_bbox_pre[:, 1] - dy is_drift_performed = True # distort image orig_h -= dy orig_w -= dx orig_x, dist_x = max(dx, 0), max(-dx, 0) orig_y, dist_y = max(dy, 0), max(-dy, 0) distorted_im = np.zeros( (int(orig_h), int(orig_w), 3)).astype(np.float32) distorted_im[dist_y:, dist_x:, :] = im[orig_y:, orig_x:, :] dist_h, dist_w, _ = [float(v) for v in distorted_im.shape] im = distorted_im if mc.EIGHT_POINT_REGRESSION: if dx < 0: # Recheck right boundary xmax_temp = gt_bbox_pre[:, 0] + ( gt_bbox_pre[:, 2] / 2) temp_ids = np.where( xmax_temp >= dist_w - 1 - self.right_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[ temp_ids, 2] = True # Right boundary boundary_adhesion_pre[ temp_ids, 7] = True # Right top boundary boundary_adhesion_pre[ temp_ids, 6] = True # Right bottom boundary if dy < 0: # Recheck bottom boundary ymax_temp = gt_bbox_pre[:, 1] + ( gt_bbox_pre[:, 3] / 2) temp_ids = np.where(ymax_temp >= dist_h - 1 - self.bottom_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[ temp_ids, 3] = True # Bottom boundary boundary_adhesion_pre[ temp_ids, 6] = True # Bottom right boundary boundary_adhesion_pre[ temp_ids, 5] = True # Bottom left boundary if dx > 0: # Recheck left boundary xmin_temp = gt_bbox_pre[:, 0] - ( gt_bbox_pre[:, 2] / 2) temp_ids = np.where( xmin_temp <= self.left_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[ temp_ids, 0] = True # Left boundary boundary_adhesion_pre[ temp_ids, 4] = True # Left top boundary boundary_adhesion_pre[ temp_ids, 5] = True # Left bottom boundary if dy > 0: # Recheck top boundary ymin_temp = gt_bbox_pre[:, 1] - ( gt_bbox_pre[:, 3] / 2) temp_ids = np.where( ymin_temp <= self.top_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[temp_ids, 1] = True # Top boundary boundary_adhesion_pre[ temp_ids, 4] = True # Top left boundary boundary_adhesion_pre[ temp_ids, 7] = True # Top right boundary else: if dx < 0: # Recheck right boundary xmax_temp = gt_bbox_pre[:, 0] + ( gt_bbox_pre[:, 2] / 2) temp_ids = np.where( xmax_temp >= dist_w - 1 - self.right_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[ temp_ids, 2] = True # Right boundary if dy < 0: # Recheck bottom boundary ymax_temp = gt_bbox_pre[:, 1] + ( gt_bbox_pre[:, 3] / 2) temp_ids = np.where(ymax_temp >= dist_h - 1 - self.bottom_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[ temp_ids, 3] = True # Bottom boundary if dx > 0: # Recheck left boundary xmin_temp = gt_bbox_pre[:, 0] - ( gt_bbox_pre[:, 2] / 2) temp_ids = np.where( xmin_temp <= self.left_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[ temp_ids, 0] = True # Left boundary if dy > 0: # Recheck top boundary ymin_temp = gt_bbox_pre[:, 1] - ( gt_bbox_pre[:, 3] / 2) temp_ids = np.where( ymin_temp <= self.top_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[temp_ids, 1] = True # Top boundary # Flip image with 50% probability if np.random.randint(2) > 0.5: im = im[:, ::-1, :] is_flip_performed = True gt_bbox_pre[:, 0] = orig_w - 1 - gt_bbox_pre[:, 0] if mc.EIGHT_POINT_REGRESSION: temp1 = copy.deepcopy(boundary_adhesion_pre[:, 0]) temp2 = copy.deepcopy(boundary_adhesion_pre[:, 4]) temp3 = copy.deepcopy(boundary_adhesion_pre[:, 5]) boundary_adhesion_pre[:, 0] = boundary_adhesion_pre[:, 2] boundary_adhesion_pre[:, 4] = boundary_adhesion_pre[:, 7] boundary_adhesion_pre[:, 5] = boundary_adhesion_pre[:, 6] boundary_adhesion_pre[:, 2] = temp1 boundary_adhesion_pre[:, 7] = temp2 boundary_adhesion_pre[:, 6] = temp3 else: temp = copy.deepcopy(boundary_adhesion_pre[:, 0]) boundary_adhesion_pre[:, 0] = boundary_adhesion_pre[:, 2] boundary_adhesion_pre[:, 2] = temp # scale image im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT)) image_per_batch.append(im) # scale annotation x_scale = mc.IMAGE_WIDTH / orig_w y_scale = mc.IMAGE_HEIGHT / orig_h gt_bbox_pre[:, 0::2] = gt_bbox_pre[:, 0::2] * x_scale gt_bbox_pre[:, 1::2] = gt_bbox_pre[:, 1::2] * y_scale assert np.all((gt_bbox_pre[:, 0] - (gt_bbox_pre[:, 2]/2.0)) >= 0) or \ np.all((gt_bbox_pre[:, 0] + (gt_bbox_pre[:, 2]/2.0)) < orig_w), "Error in the bounding boxes after augmentation" if mc.EIGHT_POINT_REGRESSION: for p in range(len(polygons)): poly = np.array(polygons[p]) if is_drift_performed: poly[:, 0] = poly[:, 0] - dx poly[:, 1] = poly[:, 1] - dy if is_flip_performed: poly[:, 0] = orig_w - 1 - poly[:, 0] poly[:, 0] = poly[:, 0] * x_scale poly[:, 1] = poly[:, 1] * y_scale polygons[p] = poly is_drift_performed = False is_flip_performed = False gt_bbox = gt_bbox_pre # Use shifted bounding box if EIGHT_POINT_REGRESSION = False # Transform the bounding box to offset mode. # We extract the bounding box from the flipped and drifted masks to ensure # consistency. if mc.EIGHT_POINT_REGRESSION: gt_bbox = [] actual_bin_masks = [] for k in range(len(polygons)): polygon = polygons[k] mask_vector = self._get_8_point_mask( polygon, mc.IMAGE_HEIGHT, mc.IMAGE_WIDTH) center_x, center_y, width, height, of1, of2, of3, of4 = mask_vector if width == 0 or height == 0: print("Error in width or height so ignoring", width, height, gt_bbox_pre[k][2], gt_bbox_pre[k][3], center_x, center_y, gt_bbox_pre[k][0], gt_bbox_pre[k][1], idx) del label_per_batch[img_ct][k] continue assert not (of1 <= 0 or of2 <= 0 or of3 <= 0 or of4 <= 0 ), "Error Occured " + str(of1) + " " + str( of2) + " " + str(of3) + " " + str(of4) points = decode_parameterization(mask_vector) points = np.round(points) points = np.array(points, 'int32') assert not ((points[0][1] - points[1][1]) > 1 or (points[2][0] - points[3][0]) > 1 or (points[5][1] - points[4][1]) > 1 or (points[7][0] - points[6][0]) > 1), \ "\n\n Error in extraction:"+str(points)+" "+str(idx)+" "+str(mask_vector) gt_bbox.append(mask_vector) bbox_per_batch.append(gt_bbox) boundary_adhesions_per_batch.append(boundary_adhesion_pre) aidx_per_image, delta_per_image = [], [] aidx_set = set() for i in range(len(gt_bbox)): overlaps = batch_iou(mc.ANCHOR_BOX, gt_bbox[i]) aidx = len(mc.ANCHOR_BOX) for ov_idx in np.argsort(overlaps)[::-1]: if overlaps[ov_idx] <= 0: if mc.DEBUG_MODE: min_iou = min(overlaps[ov_idx], min_iou) num_objects += 1 num_zero_iou_obj += 1 break if ov_idx not in aidx_set: aidx_set.add(ov_idx) aidx = ov_idx if mc.DEBUG_MODE: max_iou = max(overlaps[ov_idx], max_iou) min_iou = min(overlaps[ov_idx], min_iou) avg_ious += overlaps[ov_idx] num_objects += 1 break if aidx == len(mc.ANCHOR_BOX): # even the largeset available overlap is 0, thus, choose one with the # smallest square distance dist = np.sum(np.square(gt_bbox[i] - mc.ANCHOR_BOX), axis=1) for dist_idx in np.argsort(dist): if dist_idx not in aidx_set: aidx_set.add(dist_idx) aidx = dist_idx break if mc.EIGHT_POINT_REGRESSION: box_cx, box_cy, box_w, box_h, of1, of2, of3, of4 = gt_bbox[ i] delta = [0] * 8 else: box_cx, box_cy, box_w, box_h = gt_bbox[i] delta = [0] * 4 if mc.ENCODING_TYPE == 'asymmetric_linear': # Use linear domain anchors xmin_t, ymin_t, xmax_t, ymax_t = bbox_transform( [box_cx, box_cy, box_w, box_h]) xmin_a, ymin_a, xmax_a, ymax_a = bbox_transform( mc.ANCHOR_BOX[aidx]) delta[0] = (xmin_t - xmin_a) / mc.ANCHOR_BOX[aidx][2] delta[1] = (ymin_t - ymin_a) / mc.ANCHOR_BOX[aidx][3] delta[2] = (xmax_t - xmax_a) / mc.ANCHOR_BOX[aidx][2] delta[3] = (ymax_t - ymax_a) / mc.ANCHOR_BOX[aidx][3] elif mc.ENCODING_TYPE == 'asymmetric_log': # Use log domain anchors EPSILON = 0.5 xmin_t, ymin_t, xmax_t, ymax_t = bbox_transform( [box_cx, box_cy, box_w, box_h]) delta[0] = np.log( max((mc.ANCHOR_BOX[aidx][0] - xmin_t) / mc.ANCHOR_BOX[aidx][2], 0) + EPSILON) delta[1] = np.log( max((mc.ANCHOR_BOX[aidx][1] - ymin_t) / mc.ANCHOR_BOX[aidx][3], 0) + EPSILON) delta[2] = np.log( max((xmax_t - mc.ANCHOR_BOX[aidx][0]) / mc.ANCHOR_BOX[aidx][2], 0) + EPSILON) delta[3] = np.log( max((ymax_t - mc.ANCHOR_BOX[aidx][1]) / mc.ANCHOR_BOX[aidx][3], 0) + EPSILON) else: delta[0] = (box_cx - mc.ANCHOR_BOX[aidx][0] ) / mc.ANCHOR_BOX[aidx][2] delta[1] = (box_cy - mc.ANCHOR_BOX[aidx][1] ) / mc.ANCHOR_BOX[aidx][3] delta[2] = np.log( box_w / mc.ANCHOR_BOX[aidx][2] ) # if box_w or box_h = 0, the box is not included delta[3] = np.log(box_h / mc.ANCHOR_BOX[aidx][3]) if mc.EIGHT_POINT_REGRESSION: EPSILON = 1e-8 anchor_diagonal = (mc.ANCHOR_BOX[aidx][2]**2 + mc.ANCHOR_BOX[aidx][3]**2)**(0.5) delta[4] = np.log((of1 + EPSILON) / anchor_diagonal) delta[5] = np.log((of2 + EPSILON) / anchor_diagonal) delta[6] = np.log((of3 + EPSILON) / anchor_diagonal) delta[7] = np.log((of4 + EPSILON) / anchor_diagonal) aidx_per_image.append(aidx) delta_per_image.append(delta) delta_per_batch.append(delta_per_image) aidx_per_batch.append(aidx_per_image) if mc.DEBUG_MODE: print('max iou: {}'.format(max_iou)) print('min iou: {}'.format(min_iou)) print('avg iou: {}'.format(avg_ious / num_objects)) print('number of objects: {}'.format(num_objects)) print('number of objects with 0 iou: {}'.format(num_zero_iou_obj)) return image_per_batch, label_per_batch, delta_per_batch, \ aidx_per_batch, bbox_per_batch, boundary_adhesions_per_batch
def image_demo(): """Detect image.""" assert FLAGS.demo_net == 'squeezeDet' or FLAGS.demo_net == 'squeezeDet+', \ 'Selected nueral net architecture not supported: {}'.format(FLAGS.demo_net) if flag_CSV: csv = open("squeeze_th0.1_N512_v2.csv", "w") # squeeze, checkpoint999, random, plot > 0.005 columnTitleRow = "xmin,ymin,xmax,ymax,Frame,Label,Preview URL,confidence,random,y_loc,win_sizeX,win_sizeY\n" csv.write(columnTitleRow) with tf.Graph().as_default(): # Load model if FLAGS.demo_net == 'squeezeDet': mc = kitti_squeezeDet_config() mc.BATCH_SIZE = 1 # model parameters will be restored from checkpoint mc.LOAD_PRETRAINED_MODEL = False model = SqueezeDet(mc, FLAGS.gpu) elif FLAGS.demo_net == 'squeezeDet+': mc = kitti_squeezeDetPlus_config() mc.BATCH_SIZE = 1 mc.LOAD_PRETRAINED_MODEL = False model = SqueezeDetPlus(mc, FLAGS.gpu) saver = tf.train.Saver(model.model_params) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: saver.restore(sess, FLAGS.checkpoint) cnt = 0 FPS = 0 FPS10 = 0 for f in glob.iglob(FLAGS.input_path): im = cv2.imread(f) if flag_random: """random""" randomX = random.randint(0, im.shape[1]-mc.IMAGE_WIDTH) randomY = 400 im = im[randomY:randomY + mc.IMAGE_HEIGHT, randomX:randomX + mc.IMAGE_WIDTH, :] else: """center""" im = im[400:400 + mc.IMAGE_HEIGHT, 300:300 + mc.IMAGE_WIDTH] im = im.astype(np.float32, copy=False) # im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT)) """crop image""" input_image = im - mc.BGR_MEANS start = time.time() # Detect det_boxes, det_probs, det_class = sess.run( [model.det_boxes, model.det_probs, model.det_class], feed_dict={model.image_input:[input_image]}) print (mc.PLOT_PROB_THRESH) cnt = cnt + 1 end = time.time() FPS = (1 / (end - start)) FPS10 = FPS10 + FPS # print ("FPS: " + str(FPS)) if cnt % 10 == 0: print ("FPS(mean), detection: " + str(FPS10/10)) FPS10 = 0 # Filter final_boxes, final_probs, final_class = model.filter_prediction( det_boxes[0], det_probs[0], det_class[0]) keep_idx = [idx for idx in range(len(final_probs)) \ if final_probs[idx] > mc.PLOT_PROB_THRESH] final_boxes = [final_boxes[idx] for idx in keep_idx] final_probs = [final_probs[idx] for idx in keep_idx] final_class = [final_class[idx] for idx in keep_idx] # TODO(bichen): move this color dict to configuration file cls2clr = { 'car': (255, 191, 0), 'cyclist': (0, 191, 255), 'pedestrian':(255, 0, 191) } # Draw boxes _draw_box( im, final_boxes, [mc.CLASS_NAMES[idx]+': (%.2f)'% prob \ for idx, prob in zip(final_class, final_probs)], cdict=cls2clr, ) # print (final_boxes) print (final_probs) # print (final_class) # print (mc.CLASS_NAMES) # im2 = cv2.imread(f) photo_name = f.split('/')[-1] if flag_CSV: for bbox_idx in range(len(final_boxes)): bbox = bbox_transform(final_boxes[bbox_idx]) xmin, ymin, xmax, ymax = [int(b) for b in bbox] csv.write(str(xmin+randomX)) csv.write(",") csv.write(str(ymin+randomY)) csv.write(",") csv.write(str(xmax+randomX)) csv.write(",") csv.write(str(ymax+randomY)) csv.write(",") "file name" csv.write(photo_name) csv.write(",") "label" csv.write(mc.CLASS_NAMES[final_class[bbox_idx]]) csv.write(",") csv.write(",") "confidence" csv.write(str(final_probs[bbox_idx])) csv.write(",") "random selected window, x:" csv.write(str(randomX)) csv.write(",") "random selected window, Y:" csv.write(str(randomY)) csv.write(",") "random selected window, sizeX, X:" csv.write(str(mc.IMAGE_WIDTH)) csv.write(",") "random selected window, sizeY, Y:" csv.write(str(mc.IMAGE_HEIGHT)) csv.write(",") csv.write("\n") # debug: offset random window size # cv2.rectangle(im2, (xmin + randomX, ymin + randomY), (xmax + randomX, ymax + randomY), (0, 255, 0), 1) if len(final_boxes) == 0: print ("No detection: " + photo_name) csv.write(",") csv.write(",") csv.write(",") csv.write(",") "file name" csv.write(photo_name) csv.write(",") "label" csv.write(",") csv.write(",") "confidence" csv.write(",") "random selected window, x:" csv.write(str(randomX)) csv.write(",") "random selected window, Y:" csv.write(str(randomY)) csv.write(",") "random selected window, sizeX, X:" csv.write(str(mc.IMAGE_WIDTH)) csv.write(",") "random selected window, sizeY, Y:" csv.write(str(mc.IMAGE_HEIGHT)) csv.write(",") csv.write("\n") # for bbox, label in zip(final_boxes, label_list): # # # xmin, ymin, xmax, ymax = [int(b) for b in bbox] # # l = label.split(':')[0] # text before "CLASS: (PROB)" # if cdict and l in cdict: # c = cdict[l] # else: # c = color # # # draw box # cv2.rectangle(im, (xmin, ymin), (xmax, ymax), c, 1) file_name = os.path.split(f)[1] out_file_name = os.path.join(FLAGS.out_dir, 'out_'+file_name) if cnt < 20: cv2.imwrite(out_file_name, im) print('Image detection output saved to {}'.format(out_file_name)) else: print('(Skip)Image detection output saved to {}'.format(out_file_name))